summaryrefslogtreecommitdiffstats
path: root/api/src
diff options
context:
space:
mode:
Diffstat (limited to 'api/src')
-rw-r--r--api/src/Makefile.am14
-rw-r--r--api/src/glfs-mgmt.c16
-rw-r--r--api/src/glfs.h8
3 files changed, 25 insertions, 13 deletions
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
index 527b12d0850..872868aab0c 100644
--- a/api/src/Makefile.am
+++ b/api/src/Makefile.am
@@ -12,10 +12,13 @@ libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(GF_LDADD)
-libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
- -I$(top_srcdir)/libglusterfs/src \
- -I$(top_srcdir)/rpc/rpc-lib/src \
- -I$(top_srcdir)/rpc/xdr/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -D__USE_FILE_OFFSET64
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
libgfapi_la_LDFLAGS = -version-info $(GFAPI_LT_VERSION) \
$(GFAPI_EXTRA_LDFLAGS) $(ACL_LIBS)
@@ -35,6 +38,3 @@ api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
$(top_builddir)/api/src/libgfapi.la
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
-
-AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
index 48fd618ff68..593611612b8 100644
--- a/api/src/glfs-mgmt.c
+++ b/api/src/glfs-mgmt.c
@@ -855,11 +855,21 @@ glfs_mgmt_init (struct glfs *fs)
if (cmd_args->volfile_server_port)
port = cmd_args->volfile_server_port;
- host = "localhost";
- if (cmd_args->volfile_server)
+ if (cmd_args->volfile_server) {
host = cmd_args->volfile_server;
+ } else if (cmd_args->volfile_server_transport &&
+ !strcmp (cmd_args->volfile_server_transport, "unix")) {
+ host = DEFAULT_GLUSTERD_SOCKFILE;
+ } else {
+ host = "localhost";
+ }
+
+ if (!strcmp (cmd_args->volfile_server_transport, "unix")) {
+ ret = rpc_transport_unix_options_build (&options, host, 0);
+ } else {
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ }
- ret = rpc_transport_inet_options_build (&options, host, port);
if (ret)
goto out;
diff --git a/api/src/glfs.h b/api/src/glfs.h
index b073d8d1b17..d3bf1b4cdcc 100644
--- a/api/src/glfs.h
+++ b/api/src/glfs.h
@@ -164,12 +164,14 @@ int glfs_set_volfile (glfs_t *fs, const char *volfile) __THROW
management daemon. Specifying NULL will result in the usage
of the default (tcp) transport type. Permitted values
are those what you specify as transport-type in a volume
- specification file (e.g "tcp", "rdma" etc.)
+ specification file (e.g "tcp", "rdma", "unix" etc.)
@host: String specifying the address where to find the management daemon.
+ Socket path, while using Unix domain socket as transport type.
This would either be
- - FQDN (e.g: "storage01.company.com") or
- - ASCII (e.g: "192.168.22.1")
+ - FQDN (e.g : "storage01.company.com") or
+ - ASCII (e.g : "192.168.22.1") or
+ - Socket path (e.g : "/var/run/glusterd.socket")
NOTE: This API is special, multiple calls to this function with different
volfile servers, port or transport-type would create a list of volfile
/glfs.c673
-rw-r--r--api/src/glfs.h581
-rw-r--r--argp-standalone/configure.ac4
-rwxr-xr-xautogen.sh107
-rw-r--r--booster/Makefile.am1
-rw-r--r--booster/src/Makefile.am21
-rw-r--r--booster/src/booster-fd.c342
-rw-r--r--booster/src/booster-fd.h83
-rw-r--r--booster/src/booster.c3172
-rw-r--r--booster/src/booster_fstab.c452
-rw-r--r--booster/src/booster_fstab.h83
-rw-r--r--booster/src/booster_stat.c188
-rw-r--r--cli/src/Makefile.am15
-rw-r--r--cli/src/cli-cmd-misc.c29
-rw-r--r--cli/src/cli-cmd-parser.c1717
-rw-r--r--cli/src/cli-cmd-peer.c78
-rw-r--r--cli/src/cli-cmd-snapshot.c146
-rw-r--r--cli/src/cli-cmd-system.c287
-rw-r--r--cli/src/cli-cmd-volume.c330
-rw-r--r--cli/src/cli-cmd.c37
-rw-r--r--cli/src/cli-cmd.h36
-rw-r--r--cli/src/cli-mem-types.h20
-rw-r--r--cli/src/cli-rl.c32
-rw-r--r--cli/src/cli-rpc-ops.c5116
-rw-r--r--cli/src/cli-xml-output.c1604
-rw-r--r--cli/src/cli.c175
-rw-r--r--cli/src/cli.h109
-rw-r--r--cli/src/input.c30
-rw-r--r--cli/src/registry.c24
-rw-r--r--configure.ac707
-rw-r--r--contrib/aclocal/mkdirp.m4146
-rw-r--r--contrib/aclocal/python.m4209
-rw-r--r--contrib/fuse-include/fuse-mount.h3
-rw-r--r--contrib/fuse-include/fuse_kernel.h537
-rw-r--r--contrib/fuse-lib/mount-gluster-compat.h1
-rw-r--r--contrib/fuse-lib/mount.c37
-rw-r--r--contrib/fuse-util/Makefile.am4
-rw-r--r--contrib/fuse-util/fusermount.c11
-rw-r--r--contrib/libgen/basename_r.c6
-rw-r--r--contrib/libgen/dirname_r.c4
-rw-r--r--contrib/qemu/block.c4604
-rw-r--r--contrib/qemu/block/qcow.c914
-rw-r--r--contrib/qemu/block/qcow2-cache.c323
-rw-r--r--contrib/qemu/block/qcow2-cluster.c1478
-rw-r--r--contrib/qemu/block/qcow2-refcount.c1374
-rw-r--r--contrib/qemu/block/qcow2-snapshot.c660
-rw-r--r--contrib/qemu/block/qcow2.c1825
-rw-r--r--contrib/qemu/block/qcow2.h437
-rw-r--r--contrib/qemu/block/qed-check.c248
-rw-r--r--contrib/qemu/block/qed-cluster.c165
-rw-r--r--contrib/qemu/block/qed-gencb.c32
-rw-r--r--contrib/qemu/block/qed-l2-cache.c187
-rw-r--r--contrib/qemu/block/qed-table.c296
-rw-r--r--contrib/qemu/block/qed.c1596
-rw-r--r--contrib/qemu/block/qed.h344
-rw-r--r--contrib/qemu/block/snapshot.c157
-rw-r--r--contrib/qemu/config-host.h73
-rw-r--r--contrib/qemu/coroutine-ucontext.c225
-rw-r--r--contrib/qemu/include/block/aio.h247
-rw-r--r--contrib/qemu/include/block/block.h443
-rw-r--r--contrib/qemu/include/block/block_int.h421
-rw-r--r--contrib/qemu/include/block/blockjob.h278
-rw-r--r--contrib/qemu/include/block/coroutine.h218
-rw-r--r--contrib/qemu/include/block/coroutine_int.h53
-rw-r--r--contrib/qemu/include/block/snapshot.h53
-rw-r--r--contrib/qemu/include/config.h2
-rw-r--r--contrib/qemu/include/exec/cpu-common.h124
-rw-r--r--contrib/qemu/include/exec/hwaddr.h20
-rw-r--r--contrib/qemu/include/exec/poison.h63
-rw-r--r--contrib/qemu/include/fpu/softfloat.h641
-rw-r--r--contrib/qemu/include/glib-compat.h27
-rw-r--r--contrib/qemu/include/migration/migration.h157
-rw-r--r--contrib/qemu/include/migration/qemu-file.h266
-rw-r--r--contrib/qemu/include/migration/vmstate.h740
-rw-r--r--contrib/qemu/include/monitor/monitor.h104
-rw-r--r--contrib/qemu/include/monitor/readline.h55
-rw-r--r--contrib/qemu/include/qapi/error.h85
-rw-r--r--contrib/qemu/include/qapi/qmp/json-lexer.h51
-rw-r--r--contrib/qemu/include/qapi/qmp/json-parser.h24
-rw-r--r--contrib/qemu/include/qapi/qmp/json-streamer.h40
-rw-r--r--contrib/qemu/include/qapi/qmp/qbool.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qdict.h69
-rw-r--r--contrib/qemu/include/qapi/qmp/qerror.h249
-rw-r--r--contrib/qemu/include/qapi/qmp/qfloat.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qint.h28
-rw-r--r--contrib/qemu/include/qapi/qmp/qjson.h29
-rw-r--r--contrib/qemu/include/qapi/qmp/qlist.h63
-rw-r--r--contrib/qemu/include/qapi/qmp/qobject.h112
-rw-r--r--contrib/qemu/include/qapi/qmp/qstring.h36
-rw-r--r--contrib/qemu/include/qapi/qmp/types.h25
-rw-r--r--contrib/qemu/include/qemu-common.h478
-rw-r--r--contrib/qemu/include/qemu/aes.h45
-rw-r--r--contrib/qemu/include/qemu/atomic.h202
-rw-r--r--contrib/qemu/include/qemu/bitmap.h222
-rw-r--r--contrib/qemu/include/qemu/bitops.h276
-rw-r--r--contrib/qemu/include/qemu/bswap.h478
-rw-r--r--contrib/qemu/include/qemu/compiler.h55
-rw-r--r--contrib/qemu/include/qemu/error-report.h46
-rw-r--r--contrib/qemu/include/qemu/event_notifier.h46
-rw-r--r--contrib/qemu/include/qemu/hbitmap.h209
-rw-r--r--contrib/qemu/include/qemu/host-utils.h322
-rw-r--r--contrib/qemu/include/qemu/iov.h115
-rw-r--r--contrib/qemu/include/qemu/main-loop.h311
-rw-r--r--contrib/qemu/include/qemu/module.h40
-rw-r--r--contrib/qemu/include/qemu/notify.h72
-rw-r--r--contrib/qemu/include/qemu/option.h157
-rw-r--r--contrib/qemu/include/qemu/option_int.h54
-rw-r--r--contrib/qemu/include/qemu/osdep.h218
-rw-r--r--contrib/qemu/include/qemu/queue.h414
-rw-r--r--contrib/qemu/include/qemu/sockets.h83
-rw-r--r--contrib/qemu/include/qemu/thread-posix.h28
-rw-r--r--contrib/qemu/include/qemu/thread.h56
-rw-r--r--contrib/qemu/include/qemu/timer.h305
-rw-r--r--contrib/qemu/include/qemu/typedefs.h69
-rw-r--r--contrib/qemu/include/sysemu/os-posix.h52
-rw-r--r--contrib/qemu/include/sysemu/sysemu.h200
-rw-r--r--contrib/qemu/include/trace.h6
-rw-r--r--contrib/qemu/nop-symbols.c12
-rw-r--r--contrib/qemu/qapi-types.h2746
-rw-r--r--contrib/qemu/qemu-coroutine-lock.c178
-rw-r--r--contrib/qemu/qemu-coroutine-sleep.c39
-rw-r--r--contrib/qemu/qemu-coroutine.c135
-rw-r--r--contrib/qemu/qmp-commands.h204
-rw-r--r--contrib/qemu/qobject/json-lexer.c373
-rw-r--r--contrib/qemu/qobject/json-parser.c724
-rw-r--r--contrib/qemu/qobject/json-streamer.c122
-rw-r--r--contrib/qemu/qobject/qbool.c68
-rw-r--r--contrib/qemu/qobject/qdict.c478
-rw-r--r--contrib/qemu/qobject/qerror.c156
-rw-r--r--contrib/qemu/qobject/qfloat.c68
-rw-r--r--contrib/qemu/qobject/qint.c67
-rw-r--r--contrib/qemu/qobject/qjson.c282
-rw-r--r--contrib/qemu/qobject/qlist.c170
-rw-r--r--contrib/qemu/qobject/qstring.c149
-rw-r--r--contrib/qemu/trace/generated-tracers.h3759
-rw-r--r--contrib/qemu/util/aes.c1314
-rw-r--r--contrib/qemu/util/bitmap.c256
-rw-r--r--contrib/qemu/util/bitops.c158
-rw-r--r--contrib/qemu/util/cutils.c532
-rw-r--r--contrib/qemu/util/error.c120
-rw-r--r--contrib/qemu/util/hbitmap.c402
-rw-r--r--contrib/qemu/util/hexdump.c37
-rw-r--r--contrib/qemu/util/iov.c426
-rw-r--r--contrib/qemu/util/module.c81
-rw-r--r--contrib/qemu/util/oslib-posix.c243
-rw-r--r--contrib/qemu/util/qemu-error.c225
-rw-r--r--contrib/qemu/util/qemu-option.c1126
-rw-r--r--contrib/qemu/util/qemu-thread-posix.c327
-rw-r--r--contrib/qemu/util/unicode.c100
-rw-r--r--contrib/stdlib/gf_mkostemp.c107
-rw-r--r--doc/Makefile.am7
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.ent4
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.xml27
-rw-r--r--doc/admin-guide/en-US/Author_Group.xml17
-rw-r--r--doc/admin-guide/en-US/Book_Info.xml28
-rw-r--r--doc/admin-guide/en-US/Chapter.xml33
-rw-r--r--doc/admin-guide/en-US/Preface.xml24
-rw-r--r--doc/admin-guide/en-US/Revision_History.xml27
-rw-r--r--doc/admin-guide/en-US/admin_ACLs.xml206
-rw-r--r--doc/admin-guide/en-US/admin_Hadoop.xml244
-rw-r--r--doc/admin-guide/en-US/admin_UFO.xml1588
-rw-r--r--doc/admin-guide/en-US/admin_commandref.xml334
-rw-r--r--doc/admin-guide/en-US/admin_console.xml28
-rw-r--r--doc/admin-guide/en-US/admin_directory_Quota.xml179
-rw-r--r--doc/admin-guide/en-US/admin_geo-replication.xml732
-rw-r--r--doc/admin-guide/en-US/admin_managing_volumes.xml735
-rw-r--r--doc/admin-guide/en-US/admin_monitoring_workload.xml878
-rw-r--r--doc/admin-guide/en-US/admin_setting_volumes.xml325
-rw-r--r--doc/admin-guide/en-US/admin_settingup_clients.xml511
-rw-r--r--doc/admin-guide/en-US/admin_start_stop_daemon.xml56
-rw-r--r--doc/admin-guide/en-US/admin_storage_pools.xml57
-rw-r--r--doc/admin-guide/en-US/admin_troubleshooting.xml509
-rw-r--r--doc/admin-guide/en-US/gfs_introduction.xml54
-rw-r--r--doc/admin-guide/en-US/glossary.xml126
-rw-r--r--doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png (renamed from doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png)bin97477 -> 97477 bytes-rw-r--r--doc/admin-guide/en-US/images/GlusterFS_Architecture.png (renamed from doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png)bin133597 -> 133597 bytes-rw-r--r--doc/admin-guide/en-US/images/arhitecture.png13
-rw-r--r--doc/admin-guide/en-US/markdown/Administration_Guide.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Author_Group.md5
-rw-r--r--doc/admin-guide/en-US/markdown/Book_Info.md1
-rw-r--r--doc/admin-guide/en-US/markdown/Chapter.md18
-rw-r--r--doc/admin-guide/en-US/markdown/Preface.md22
-rw-r--r--doc/admin-guide/en-US/markdown/Revision_History.md4
-rw-r--r--doc/admin-guide/en-US/markdown/admin_ACLs.md197
-rw-r--r--doc/admin-guide/en-US/markdown/admin_Hadoop.md170
-rw-r--r--doc/admin-guide/en-US/markdown/admin_UFO.md1219
-rw-r--r--doc/admin-guide/en-US/markdown/admin_commandref.md180
-rw-r--r--doc/admin-guide/en-US/markdown/admin_console.md51
-rw-r--r--doc/admin-guide/en-US/markdown/admin_directory_Quota.md172
-rw-r--r--doc/admin-guide/en-US/markdown/admin_geo-replication.md738
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_snapshots.md66
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_volumes.md710
-rw-r--r--doc/admin-guide/en-US/markdown/admin_monitoring_workload.md931
-rw-r--r--doc/admin-guide/en-US/markdown/admin_setting_volumes.md419
-rw-r--r--doc/admin-guide/en-US/markdown/admin_settingup_clients.md641
-rw-r--r--doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md70
-rw-r--r--doc/admin-guide/en-US/markdown/admin_storage_pools.md73
-rw-r--r--doc/admin-guide/en-US/markdown/admin_troubleshooting.md543
-rw-r--r--doc/admin-guide/en-US/markdown/gfs_introduction.md50
-rw-r--r--doc/admin-guide/en-US/markdown/glossary.md134
-rw-r--r--doc/admin-guide/publican.cfg12
-rw-r--r--doc/authentication.txt (renamed from doc/legacy/authentication.txt)0
-rw-r--r--doc/coding-standard.pdf (renamed from doc/legacy/coding-standard.pdf)bin68627 -> 68627 bytes-rw-r--r--doc/coding-standard.tex (renamed from doc/legacy/coding-standard.tex)3
-rw-r--r--doc/examples/legacy/Makefile.am8
-rw-r--r--doc/examples/legacy/README13
-rw-r--r--doc/examples/legacy/filter.vol23
-rw-r--r--doc/examples/legacy/io-cache.vol31
-rw-r--r--doc/examples/legacy/io-threads.vol22
-rw-r--r--doc/examples/legacy/posix-locks.vol19
-rw-r--r--doc/examples/legacy/protocol-client.vol12
-rw-r--r--doc/examples/legacy/protocol-server.vol21
-rw-r--r--doc/examples/legacy/read-ahead.vol24
-rw-r--r--doc/examples/legacy/replicate.vol118
-rw-r--r--doc/examples/legacy/stripe.vol120
-rw-r--r--doc/examples/legacy/trace.vol21
-rw-r--r--doc/examples/legacy/trash.vol20
-rw-r--r--doc/examples/legacy/write-behind.vol27
-rw-r--r--doc/features/rdma-cm-in-3.4.0.txt9
-rw-r--r--doc/features/rebalance.md74
-rw-r--r--doc/gluster.821
-rw-r--r--doc/glusterd.821
-rw-r--r--doc/glusterfs.887
-rw-r--r--doc/glusterfs.vol.sample53
-rw-r--r--doc/glusterfsd.823
-rw-r--r--doc/glusterfsd.vol.sample44
-rw-r--r--doc/hacker-guide/en-US/markdown/adding-fops.md18
-rw-r--r--doc/hacker-guide/en-US/markdown/afr.md191
-rw-r--r--doc/hacker-guide/en-US/markdown/coding-standard.md402
-rw-r--r--doc/hacker-guide/en-US/markdown/posix.md59
-rw-r--r--doc/hacker-guide/en-US/markdown/translator-development.md666
-rw-r--r--doc/hacker-guide/en-US/markdown/write-behind.md56
-rw-r--r--doc/legacy/Makefile.am (renamed from doc/user-guide/legacy/Makefile.am)0
-rw-r--r--doc/legacy/advanced-stripe.odg (renamed from doc/user-guide/legacy/advanced-stripe.odg)bin12648 -> 12648 bytes-rw-r--r--doc/legacy/advanced-stripe.pdf (renamed from doc/user-guide/legacy/advanced-stripe.pdf)bin13382 -> 13382 bytes-rw-r--r--doc/legacy/colonO-icon.jpg (renamed from doc/user-guide/legacy/colonO-icon.jpg)bin779 -> 779 bytes-rw-r--r--doc/legacy/fdl.texi (renamed from doc/user-guide/legacy/fdl.texi)0
-rw-r--r--doc/legacy/fuse.odg (renamed from doc/user-guide/legacy/fuse.odg)bin13190 -> 13190 bytes-rw-r--r--doc/legacy/fuse.pdf (renamed from doc/user-guide/legacy/fuse.pdf)bin14948 -> 14948 bytes-rw-r--r--doc/legacy/ha.odg (renamed from doc/user-guide/legacy/ha.odg)bin37290 -> 37290 bytes-rw-r--r--doc/legacy/ha.pdf (renamed from doc/user-guide/legacy/ha.pdf)bin19403 -> 19403 bytes-rw-r--r--doc/legacy/hacker-guide/adding-fops.txt33
-rw-r--r--doc/legacy/hacker-guide/bdb.txt70
-rw-r--r--doc/legacy/hacker-guide/lock-ahead.txt80
-rw-r--r--doc/legacy/hacker-guide/posix.txt59
-rw-r--r--doc/legacy/hacker-guide/write-behind.txt45
-rw-r--r--doc/legacy/stripe.odg (renamed from doc/user-guide/legacy/stripe.odg)bin10188 -> 10188 bytes-rw-r--r--doc/legacy/stripe.pdf (renamed from doc/user-guide/legacy/stripe.pdf)bin11941 -> 11941 bytes-rw-r--r--doc/legacy/unify.odg (renamed from doc/user-guide/legacy/unify.odg)bin12955 -> 12955 bytes-rw-r--r--doc/legacy/unify.pdf (renamed from doc/user-guide/legacy/unify.pdf)bin18969 -> 18969 bytes-rw-r--r--doc/legacy/user-guide.info (renamed from doc/user-guide/legacy/user-guide.info)8
-rw-r--r--doc/legacy/user-guide.pdf (renamed from doc/user-guide/legacy/user-guide.pdf)bin353986 -> 353986 bytes-rw-r--r--doc/legacy/user-guide.texi (renamed from doc/user-guide/legacy/user-guide.texi)8
-rw-r--r--doc/legacy/xlator.odg (renamed from doc/user-guide/legacy/xlator.odg)bin12169 -> 12169 bytes-rw-r--r--doc/legacy/xlator.pdf (renamed from doc/user-guide/legacy/xlator.pdf)bin14358 -> 14358 bytes-rw-r--r--doc/logging.txt66
-rw-r--r--doc/mount.glusterfs.821
-rw-r--r--doc/qa/legacy/qa-client.vol170
-rw-r--r--doc/qa/legacy/qa-high-avail-client.vol17
-rw-r--r--doc/qa/legacy/qa-high-avail-server.vol344
-rw-r--r--doc/qa/legacy/qa-server.vol284
-rw-r--r--doc/rpc-for-glusterfs.changes-done.txt (renamed from doc/legacy/rpc-for-glusterfs.changes-done.txt)0
-rw-r--r--doc/split-brain.md251
-rw-r--r--error-codes.json4
-rw-r--r--extras/LinuxRPM/Makefile.am57
-rw-r--r--extras/Makefile.am17
-rw-r--r--extras/benchmarking/Makefile.am4
-rw-r--r--extras/benchmarking/glfs-bm.c25
-rw-r--r--extras/benchmarking/rdd.c24
-rwxr-xr-xextras/generate-xdr-files.sh19
-rw-r--r--extras/geo-rep/Makefile.am2
-rw-r--r--extras/geo-rep/generate-gfid-file.sh53
-rwxr-xr-xextras/geo-rep/get-gfid.sh7
-rw-r--r--extras/geo-rep/gsync-sync-gfid.c106
-rw-r--r--extras/geo-rep/gsync-upgrade.sh123
-rw-r--r--extras/geo-rep/slave-upgrade.sh102
-rw-r--r--extras/gluster-rsyslog-5.8.conf51
-rw-r--r--extras/gluster-rsyslog-7.2.conf76
-rw-r--r--extras/glusterd-sysconfig6
-rw-r--r--extras/glusterd.vol (renamed from doc/glusterd.vol)3
-rw-r--r--extras/glusterfs-georep-logrotate18
-rw-r--r--extras/glusterfs.vim21
-rw-r--r--extras/gnfs-loganalyse.py18
-rw-r--r--extras/group-virt.example6
-rw-r--r--extras/hook-scripts/Makefile.am2
-rwxr-xr-xextras/hook-scripts/S30samba-set.sh109
-rwxr-xr-xextras/hook-scripts/S30samba-start.sh96
-rwxr-xr-xextras/hook-scripts/S30samba-stop.sh55
-rwxr-xr-xextras/hook-scripts/S40ufo-stop.py24
-rwxr-xr-xextras/hook-scripts/S56glusterd-geo-rep-create-post.sh42
-rw-r--r--extras/init.d/Makefile.am17
-rwxr-xr-xextras/init.d/glusterd-Redhat.in114
-rwxr-xr-xextras/init.d/glusterd-SuSE.in10
-rwxr-xr-xextras/init.d/rhel5-load-fuse.modules7
-rw-r--r--extras/logger.conf.example13
-rw-r--r--extras/ocf/Makefile.am11
-rwxr-xr-xextras/ocf/glusterd.in212
-rwxr-xr-xextras/ocf/volume.in246
-rwxr-xr-xextras/profiler/glusterfs-profiler23
-rwxr-xr-xextras/prot_filter.py144
-rwxr-xr-xextras/rebalance.py299
-rw-r--r--extras/stripe-merge.c492
-rw-r--r--extras/systemd/Makefile.am11
-rw-r--r--extras/systemd/glusterd.service.in14
-rwxr-xr-xextras/test/bug-920583.t50
-rwxr-xr-xextras/test/gluster_commands.sh40
-rw-r--r--extras/test/ld-preload-test/ld-preload-lib.c20
-rw-r--r--extras/test/ld-preload-test/ld-preload-test.c20
-rwxr-xr-xextras/test/run.sh23
-rwxr-xr-xextras/test/stop_glusterd.sh23
-rw-r--r--extras/test/test-ffop.c779
-rw-r--r--extras/volfilter.py167
-rw-r--r--extras/who-wrote-glusterfs/gitdm.aliases48
-rw-r--r--extras/who-wrote-glusterfs/gitdm.config8
-rw-r--r--extras/who-wrote-glusterfs/gitdm.domain-map15
-rwxr-xr-xextras/who-wrote-glusterfs/who-wrote-glusterfs.sh50
-rwxr-xr-xgen-headers.py54
-rw-r--r--geo-replication/Makefile.am (renamed from xlators/features/marker/utils/Makefile.am)0
-rw-r--r--geo-replication/src/Makefile.am33
-rw-r--r--geo-replication/src/gsyncd.c (renamed from xlators/features/marker/utils/src/gsyncd.c)128
-rwxr-xr-xgeo-replication/src/gverify.sh160
-rw-r--r--geo-replication/src/peer_add_secret_pub.in9
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in12
-rw-r--r--geo-replication/src/procdiggy.c (renamed from xlators/features/marker/utils/src/procdiggy.c)28
-rw-r--r--geo-replication/src/procdiggy.h20
-rw-r--r--geo-replication/syncdaemon/Makefile.am7
-rw-r--r--geo-replication/syncdaemon/README.md (renamed from xlators/features/marker/utils/syncdaemon/README.md)39
-rw-r--r--geo-replication/syncdaemon/__codecheck.py (renamed from xlators/features/marker/utils/syncdaemon/__codecheck.py)0
-rw-r--r--geo-replication/syncdaemon/__init__.py (renamed from xlators/features/marker/utils/syncdaemon/__init__.py)0
-rw-r--r--geo-replication/syncdaemon/configinterface.py (renamed from xlators/features/marker/utils/syncdaemon/configinterface.py)4
-rw-r--r--geo-replication/syncdaemon/gconf.py (renamed from xlators/features/marker/utils/syncdaemon/gconf.py)5
-rw-r--r--geo-replication/syncdaemon/gsyncd.py (renamed from xlators/features/marker/utils/syncdaemon/gsyncd.py)234
-rw-r--r--geo-replication/syncdaemon/libcxattr.py (renamed from xlators/features/marker/utils/syncdaemon/libcxattr.py)15
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py64
-rw-r--r--geo-replication/syncdaemon/master.py1022
-rw-r--r--geo-replication/syncdaemon/monitor.py244
-rw-r--r--geo-replication/syncdaemon/repce.py (renamed from xlators/features/marker/utils/syncdaemon/repce.py)0
-rw-r--r--geo-replication/syncdaemon/resource.py (renamed from xlators/features/marker/utils/syncdaemon/resource.py)326
-rw-r--r--geo-replication/syncdaemon/syncdutils.py (renamed from xlators/features/marker/utils/syncdaemon/syncdutils.py)168
-rw-r--r--gf-error-codes.h.template33
-rw-r--r--glusterfs-api.pc.in12
-rw-r--r--glusterfs.spec.in919
-rw-r--r--glusterfsd/src/Makefile.am12
-rw-r--r--glusterfsd/src/glusterfsd-mem-types.h22
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c873
-rw-r--r--glusterfsd/src/glusterfsd.c842
-rw-r--r--glusterfsd/src/glusterfsd.h44
-rw-r--r--libgfchangelog.pc.in11
-rw-r--r--libglusterfs/src/Makefile.am38
-rw-r--r--libglusterfs/src/call-stub.c3461
-rw-r--r--libglusterfs/src/call-stub.h745
-rw-r--r--libglusterfs/src/circ-buff.c60
-rw-r--r--libglusterfs/src/circ-buff.h5
-rw-r--r--libglusterfs/src/client_t.c890
-rw-r--r--libglusterfs/src/client_t.h135
-rw-r--r--libglusterfs/src/common-utils.c932
-rw-r--r--libglusterfs/src/common-utils.h150
-rw-r--r--libglusterfs/src/compat.c28
-rw-r--r--libglusterfs/src/compat.h11
-rw-r--r--libglusterfs/src/ctx.c48
-rw-r--r--libglusterfs/src/defaults.c288
-rw-r--r--libglusterfs/src/defaults.h50
-rw-r--r--libglusterfs/src/dict.c325
-rw-r--r--libglusterfs/src/dict.h49
-rw-r--r--libglusterfs/src/event-epoll.c463
-rw-r--r--libglusterfs/src/event-history.c6
-rw-r--r--libglusterfs/src/event-history.h3
-rw-r--r--libglusterfs/src/event-poll.c451
-rw-r--r--libglusterfs/src/event.c840
-rw-r--r--libglusterfs/src/event.h37
-rw-r--r--libglusterfs/src/fd-lk.c28
-rw-r--r--libglusterfs/src/fd-lk.h4
-rw-r--r--libglusterfs/src/fd.c86
-rw-r--r--libglusterfs/src/fd.h32
-rw-r--r--libglusterfs/src/gf-dirent.h3
-rw-r--r--libglusterfs/src/gidcache.c192
-rw-r--r--libglusterfs/src/gidcache.h53
-rw-r--r--libglusterfs/src/globals.c278
-rw-r--r--libglusterfs/src/globals.h35
-rw-r--r--libglusterfs/src/glusterfs-acl.h81
-rw-r--r--libglusterfs/src/glusterfs.h208
-rw-r--r--libglusterfs/src/graph-print.c32
-rw-r--r--libglusterfs/src/graph.c263
-rw-r--r--libglusterfs/src/graph.l24
-rw-r--r--libglusterfs/src/graph.y166
-rw-r--r--libglusterfs/src/iatt.h18
-rw-r--r--libglusterfs/src/inode.c324
-rw-r--r--libglusterfs/src/inode.h67
-rw-r--r--libglusterfs/src/iobuf.c26
-rw-r--r--libglusterfs/src/iobuf.h2
-rw-r--r--libglusterfs/src/latency.c109
-rw-r--r--libglusterfs/src/latency.h3
-rw-r--r--libglusterfs/src/list.h40
-rw-r--r--libglusterfs/src/logging.c809
-rw-r--r--libglusterfs/src/logging.h101
-rw-r--r--libglusterfs/src/mem-pool.c77
-rw-r--r--libglusterfs/src/mem-pool.h37
-rw-r--r--libglusterfs/src/mem-types.h20
-rw-r--r--libglusterfs/src/options.c273
-rw-r--r--libglusterfs/src/options.h39
-rw-r--r--libglusterfs/src/rbthash.c7
-rw-r--r--libglusterfs/src/run.c31
-rw-r--r--libglusterfs/src/run.h6
-rw-r--r--libglusterfs/src/scheduler.c82
-rw-r--r--libglusterfs/src/scheduler.h32
-rw-r--r--libglusterfs/src/stack.c108
-rw-r--r--libglusterfs/src/stack.h113
-rw-r--r--libglusterfs/src/statedump.c211
-rw-r--r--libglusterfs/src/statedump.h11
-rw-r--r--libglusterfs/src/store.c709
-rw-r--r--libglusterfs/src/store.h112
-rw-r--r--libglusterfs/src/syncop.c1127
-rw-r--r--libglusterfs/src/syncop.h251
-rw-r--r--libglusterfs/src/syscall.c23
-rw-r--r--libglusterfs/src/syscall.h2
-rw-r--r--libglusterfs/src/timer.c27
-rw-r--r--libglusterfs/src/timer.h4
-rw-r--r--libglusterfs/src/timespec.c68
-rw-r--r--libglusterfs/src/timespec.h24
-rw-r--r--libglusterfs/src/trie.c6
-rw-r--r--libglusterfs/src/xlator.c200
-rw-r--r--libglusterfs/src/xlator.h91
-rw-r--r--libglusterfsclient/src/Makefile.am16
-rw-r--r--libglusterfsclient/src/libglusterfsclient-dentry.c404
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient-internals.h289
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.c8160
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.h1363
-rw-r--r--mod_glusterfs/Makefile.am3
-rw-r--r--mod_glusterfs/apache/1.3/src/Makefile.am30
-rw-r--r--mod_glusterfs/apache/1.3/src/README.txt107
-rw-r--r--mod_glusterfs/apache/1.3/src/mod_glusterfs.c507
-rw-r--r--mod_glusterfs/apache/2.2/src/Makefile.am31
-rw-r--r--mod_glusterfs/apache/2.2/src/README.txt105
-rw-r--r--mod_glusterfs/apache/2.2/src/mod_glusterfs.c3627
-rw-r--r--mod_glusterfs/apache/Makefile.am10
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.4/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.c1820
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.h32
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.5/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.c1476
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.h29
-rw-r--r--mod_glusterfs/lighttpd/Makefile.am3
-rwxr-xr-xrfc.sh9
-rw-r--r--rpc/rpc-lib/src/Makefile.am13
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c44
-rw-r--r--rpc/rpc-lib/src/auth-unix.c1
-rw-r--r--rpc/rpc-lib/src/protocol-common.h56
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c197
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h13
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c872
-rw-r--r--rpc/rpc-lib/src/rpc-drc.h104
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c135
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h22
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c137
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h65
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c910
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h88
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.c2
-rw-r--r--rpc/rpc-transport/rdma/src/Makefile.am10
-rw-r--r--rpc/rpc-transport/rdma/src/name.c80
-rw-r--r--rpc/rpc-transport/rdma/src/name.h9
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c2887
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h105
-rw-r--r--rpc/rpc-transport/socket/src/Makefile.am10
-rw-r--r--rpc/rpc-transport/socket/src/name.c56
-rw-r--r--rpc/rpc-transport/socket/src/socket.c1829
-rw-r--r--rpc/rpc-transport/socket/src/socket.h107
-rw-r--r--rpc/xdr/src/Makefile.am9
-rw-r--r--rpc/xdr/src/acl.x48
-rw-r--r--rpc/xdr/src/acl3-xdr.c94
-rw-r--r--rpc/xdr/src/acl3-xdr.h107
-rw-r--r--rpc/xdr/src/cli1-xdr.c120
-rw-r--r--rpc/xdr/src/cli1-xdr.h106
-rw-r--r--rpc/xdr/src/cli1-xdr.x60
-rw-r--r--rpc/xdr/src/glusterd1-xdr.c449
-rw-r--r--rpc/xdr/src/glusterd1-xdr.h182
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x91
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.c216
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.h171
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.x82
-rw-r--r--rpc/xdr/src/glusterfs3.h24
-rw-r--r--rpc/xdr/src/mount3udp.x25
-rw-r--r--rpc/xdr/src/msg-nfs3.c48
-rw-r--r--rpc/xdr/src/msg-nfs3.h32
-rw-r--r--rpc/xdr/src/nlm4-xdr.c19
-rw-r--r--rpc/xdr/src/nlm4-xdr.h19
-rw-r--r--rpc/xdr/src/nlm4.x19
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.c19
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.h19
-rw-r--r--rpc/xdr/src/nlmcbk.x19
-rw-r--r--rpc/xdr/src/nsm-xdr.c19
-rw-r--r--rpc/xdr/src/portmap-xdr.c19
-rw-r--r--rpc/xdr/src/portmap-xdr.h19
-rw-r--r--rpc/xdr/src/rpc-common-xdr.c19
-rw-r--r--rpc/xdr/src/rpc-common-xdr.h19
-rw-r--r--rpc/xdr/src/xdr-generic.c21
-rw-r--r--rpc/xdr/src/xdr-generic.h20
-rw-r--r--rpc/xdr/src/xdr-nfs3.c25
-rw-r--r--rpc/xdr/src/xdr-nfs3.h21
-rwxr-xr-xrun-tests.sh30
-rw-r--r--scheduler/Makefile.am3
-rw-r--r--scheduler/alu/src/Makefile.am14
-rw-r--r--scheduler/alu/src/alu-mem-types.h35
-rw-r--r--scheduler/alu/src/alu.c1019
-rw-r--r--scheduler/alu/src/alu.h89
-rw-r--r--scheduler/nufa/src/Makefile.am12
-rw-r--r--scheduler/nufa/src/nufa-mem-types.h33
-rw-r--r--scheduler/nufa/src/nufa.c429
-rw-r--r--scheduler/random/src/Makefile.am14
-rw-r--r--scheduler/random/src/random-mem-types.h32
-rw-r--r--scheduler/random/src/random.c305
-rw-r--r--scheduler/random/src/random.h46
-rw-r--r--scheduler/rr/Makefile.am3
-rw-r--r--scheduler/rr/src/Makefile.am13
-rw-r--r--scheduler/rr/src/rr-mem-types.h32
-rw-r--r--scheduler/rr/src/rr-options.c256
-rw-r--r--scheduler/rr/src/rr-options.h34
-rw-r--r--scheduler/rr/src/rr.c567
-rw-r--r--scheduler/rr/src/rr.h70
-rw-r--r--scheduler/switch/Makefile.am3
-rw-r--r--scheduler/switch/src/Makefile.am12
-rw-r--r--scheduler/switch/src/switch-mem-types.h33
-rw-r--r--scheduler/switch/src/switch.c451
-rwxr-xr-xsmoke.sh83
-rw-r--r--swift/1.4.8/README22
-rw-r--r--swift/1.4.8/gluster-swift-plugin.spec60
-rw-r--r--swift/1.4.8/gluster-swift.spec396
-rw-r--r--swift/1.4.8/plugins/DiskDir.py484
-rw-r--r--swift/1.4.8/plugins/DiskFile.py316
-rw-r--r--swift/1.4.8/plugins/Glusterfs.py103
-rw-r--r--swift/1.4.8/plugins/__init__.py16
-rw-r--r--swift/1.4.8/plugins/conf/account-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/account.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/account.ring.gzbin739 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container-server/1.conf24
-rw-r--r--swift/1.4.8/plugins/conf/container.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container.ring.gzbin741 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/db_file.db0
-rw-r--r--swift/1.4.8/plugins/conf/fs.conf8
-rw-r--r--swift/1.4.8/plugins/conf/object-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/object.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/object.ring.gzbin738 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/proxy-server.conf21
-rw-r--r--swift/1.4.8/plugins/conf/swift.conf7
-rw-r--r--swift/1.4.8/plugins/constraints.py97
-rw-r--r--swift/1.4.8/plugins/utils.py680
-rw-r--r--swift/1.4.8/swift.diff798
-rw-r--r--tests/README.md27
-rw-r--r--tests/afr.rc15
-rwxr-xr-xtests/basic/bd.t131
-rwxr-xr-xtests/basic/cdc.t135
-rwxr-xr-xtests/basic/file-snapshot.t56
-rwxr-xr-xtests/basic/mgmt_v3-locks.t121
-rwxr-xr-xtests/basic/mount.t78
-rw-r--r--tests/basic/nufa.t32
-rwxr-xr-xtests/basic/posixonly.t30
-rw-r--r--tests/basic/pump.t44
-rwxr-xr-xtests/basic/quota.t51
-rwxr-xr-xtests/basic/rpm.t109
-rw-r--r--tests/basic/self-heald.t48
-rwxr-xr-xtests/basic/volume-snapshot.t83
-rw-r--r--tests/basic/volume-status.t66
-rwxr-xr-xtests/basic/volume.t34
-rwxr-xr-xtests/bugs/859927/repl.t69
-rw-r--r--tests/bugs/886998/strict-readdir.t52
-rw-r--r--tests/bugs/949327.t23
-rwxr-xr-xtests/bugs/bug-000000.t9
-rw-r--r--tests/bugs/bug-1002207.t54
-rwxr-xr-xtests/bugs/bug-1002556.t25
-rw-r--r--tests/bugs/bug-1004218.t26
-rw-r--r--tests/bugs/bug-1004744.t48
-rwxr-xr-xtests/bugs/bug-1015990-rep.t81
-rwxr-xr-xtests/bugs/bug-1015990.t95
-rwxr-xr-xtests/bugs/bug-1022055.t26
-rw-r--r--tests/bugs/bug-1022905.t39
-rw-r--r--tests/bugs/bug-1030208.t35
-rw-r--r--tests/bugs/bug-1040934.t37
-rw-r--r--tests/bugs/bug-1045333.t48
-rwxr-xr-xtests/bugs/bug-1049834.t40
-rwxr-xr-xtests/bugs/bug-1064768.t20
-rwxr-xr-xtests/bugs/bug-762989.t32
-rw-r--r--tests/bugs/bug-764638.t13
-rwxr-xr-xtests/bugs/bug-765230.t60
-rw-r--r--tests/bugs/bug-765380.t39
-rwxr-xr-xtests/bugs/bug-765473.t33
-rw-r--r--tests/bugs/bug-765564.t83
-rwxr-xr-xtests/bugs/bug-767095.t51
-rwxr-xr-xtests/bugs/bug-767585-gfid.t43
-rwxr-xr-xtests/bugs/bug-770655.t168
-rwxr-xr-xtests/bugs/bug-782095.t48
-rwxr-xr-xtests/bugs/bug-797171.t43
-rwxr-xr-xtests/bugs/bug-802417.t108
-rwxr-xr-xtests/bugs/bug-808400-dist.t31
-rw-r--r--tests/bugs/bug-808400-fcntl.c113
-rw-r--r--tests/bugs/bug-808400-flock.c92
-rwxr-xr-xtests/bugs/bug-808400-repl.t30
-rwxr-xr-xtests/bugs/bug-808400-stripe.t31
-rwxr-xr-xtests/bugs/bug-808400.t34
-rwxr-xr-xtests/bugs/bug-811493.t18
-rw-r--r--tests/bugs/bug-821056.t52
-rwxr-xr-xtests/bugs/bug-822830.t44
-rwxr-xr-xtests/bugs/bug-823081.t40
-rw-r--r--tests/bugs/bug-824753-file-locker.c42
-rwxr-xr-xtests/bugs/bug-824753.t45
-rwxr-xr-xtests/bugs/bug-830665.t106
-rw-r--r--tests/bugs/bug-834465.c61
-rwxr-xr-xtests/bugs/bug-834465.t44
-rw-r--r--tests/bugs/bug-839595.t31
-rwxr-xr-xtests/bugs/bug-844688.t37
-rw-r--r--tests/bugs/bug-845213.t19
-rw-r--r--tests/bugs/bug-846240.t58
-rwxr-xr-xtests/bugs/bug-847622.t25
-rwxr-xr-xtests/bugs/bug-847624.t23
-rw-r--r--tests/bugs/bug-848251.t50
-rwxr-xr-xtests/bugs/bug-852147.t85
-rwxr-xr-xtests/bugs/bug-853258.t45
-rwxr-xr-xtests/bugs/bug-853680.t52
-rwxr-xr-xtests/bugs/bug-853690.t94
-rw-r--r--tests/bugs/bug-856455.t42
-rw-r--r--tests/bugs/bug-857330/common.rc55
-rwxr-xr-xtests/bugs/bug-857330/normal.t78
-rwxr-xr-xtests/bugs/bug-857330/xml.t101
-rwxr-xr-xtests/bugs/bug-858215.t81
-rw-r--r--tests/bugs/bug-858242.c77
-rwxr-xr-xtests/bugs/bug-858242.t28
-rw-r--r--tests/bugs/bug-858488-min-free-disk.t114
-rwxr-xr-xtests/bugs/bug-859927.t70
-rw-r--r--tests/bugs/bug-860297.t13
-rw-r--r--tests/bugs/bug-860663.t51
-rw-r--r--tests/bugs/bug-861015-index.t36
-rw-r--r--tests/bugs/bug-861015-log.t29
-rwxr-xr-xtests/bugs/bug-861542.t51
-rwxr-xr-xtests/bugs/bug-862834.t46
-rw-r--r--tests/bugs/bug-862967.t59
-rw-r--r--tests/bugs/bug-863068.t76
-rwxr-xr-xtests/bugs/bug-864222.t26
-rwxr-xr-xtests/bugs/bug-865825.t76
-rw-r--r--tests/bugs/bug-866459.t44
-rw-r--r--tests/bugs/bug-867252.t41
-rw-r--r--tests/bugs/bug-867253.t59
-rw-r--r--tests/bugs/bug-869724.t37
-rwxr-xr-xtests/bugs/bug-872923.t57
-rwxr-xr-xtests/bugs/bug-873367.t41
-rw-r--r--tests/bugs/bug-873549.t17
-rw-r--r--tests/bugs/bug-873962-spb.t39
-rwxr-xr-xtests/bugs/bug-873962.t108
-rw-r--r--tests/bugs/bug-874498.t61
-rwxr-xr-xtests/bugs/bug-877293.t41
-rwxr-xr-xtests/bugs/bug-877885.t35
-rwxr-xr-xtests/bugs/bug-877992.t61
-rw-r--r--tests/bugs/bug-878004.t29
-rwxr-xr-xtests/bugs/bug-879490.t37
-rwxr-xr-xtests/bugs/bug-879494.t37
-rw-r--r--tests/bugs/bug-880898.t23
-rwxr-xr-xtests/bugs/bug-882278.t72
-rw-r--r--tests/bugs/bug-884328.t12
-rw-r--r--tests/bugs/bug-884452.t46
-rwxr-xr-xtests/bugs/bug-884455.t84
-rwxr-xr-xtests/bugs/bug-884597.t152
-rw-r--r--tests/bugs/bug-886998.t52
-rw-r--r--tests/bugs/bug-887098-gmount-crash.t48
-rwxr-xr-xtests/bugs/bug-887145.t89
-rw-r--r--tests/bugs/bug-888174.t65
-rw-r--r--tests/bugs/bug-888752.t24
-rwxr-xr-xtests/bugs/bug-889630.t56
-rw-r--r--tests/bugs/bug-889996.t19
-rwxr-xr-xtests/bugs/bug-892730.t76
-rw-r--r--tests/bugs/bug-893338.t34
-rwxr-xr-xtests/bugs/bug-893378.t73
-rw-r--r--tests/bugs/bug-895235.t23
-rwxr-xr-xtests/bugs/bug-896431.t124
-rwxr-xr-xtests/bugs/bug-902610.t59
-rw-r--r--tests/bugs/bug-903336.t13
-rwxr-xr-xtests/bugs/bug-904065.t90
-rwxr-xr-xtests/bugs/bug-904300.t61
-rw-r--r--tests/bugs/bug-905307.t36
-rw-r--r--tests/bugs/bug-905864.c82
-rw-r--r--tests/bugs/bug-905864.t32
-rw-r--r--tests/bugs/bug-906646.t93
-rwxr-xr-xtests/bugs/bug-907072.t46
-rwxr-xr-xtests/bugs/bug-908146.t39
-rwxr-xr-xtests/bugs/bug-912297.t44
-rwxr-xr-xtests/bugs/bug-912564.t92
-rw-r--r--tests/bugs/bug-913051.t65
-rw-r--r--tests/bugs/bug-913487.t14
-rw-r--r--tests/bugs/bug-913544.t24
-rwxr-xr-xtests/bugs/bug-913555.t54
-rwxr-xr-xtests/bugs/bug-915280.t51
-rwxr-xr-xtests/bugs/bug-915554.t75
-rw-r--r--tests/bugs/bug-916226.t26
-rwxr-xr-xtests/bugs/bug-916549.t19
-rw-r--r--tests/bugs/bug-918437-sh-mtime.t52
-rwxr-xr-xtests/bugs/bug-921072.t118
-rw-r--r--tests/bugs/bug-921231.t31
-rwxr-xr-xtests/bugs/bug-921408.t89
-rwxr-xr-xtests/bugs/bug-924075.t23
-rwxr-xr-xtests/bugs/bug-924265.t35
-rwxr-xr-xtests/bugs/bug-927616.t61
-rw-r--r--tests/bugs/bug-948686.t46
-rw-r--r--tests/bugs/bug-948729/bug-948729-force.t84
-rw-r--r--tests/bugs/bug-948729/bug-948729-mode-script.t85
-rw-r--r--tests/bugs/bug-948729/bug-948729.t67
-rw-r--r--tests/bugs/bug-949242.t54
-rw-r--r--tests/bugs/bug-949298.t12
-rw-r--r--tests/bugs/bug-949930.t27
-rwxr-xr-xtests/bugs/bug-955588.t27
-rw-r--r--tests/bugs/bug-957877.t31
-rw-r--r--tests/bugs/bug-958691.t50
-rw-r--r--tests/bugs/bug-958790.t21
-rw-r--r--tests/bugs/bug-961307.t32
-rw-r--r--tests/bugs/bug-961615.t34
-rw-r--r--tests/bugs/bug-961669.t48
-rwxr-xr-xtests/bugs/bug-963541.t33
-rw-r--r--tests/bugs/bug-963678.t56
-rwxr-xr-xtests/bugs/bug-964059.t30
-rw-r--r--tests/bugs/bug-966018.t34
-rwxr-xr-xtests/bugs/bug-969193.t13
-rwxr-xr-xtests/bugs/bug-970070.t14
-rwxr-xr-xtests/bugs/bug-973073.t48
-rw-r--r--tests/bugs/bug-974007.t52
-rwxr-xr-xtests/bugs/bug-974972.t36
-rw-r--r--tests/bugs/bug-976800.t28
-rw-r--r--tests/bugs/bug-977246.t21
-rwxr-xr-xtests/bugs/bug-977797.t114
-rw-r--r--tests/bugs/bug-978794.t29
-rwxr-xr-xtests/bugs/bug-979365.t47
-rw-r--r--tests/bugs/bug-982174.t36
-rwxr-xr-xtests/bugs/bug-983477.t52
-rw-r--r--tests/bugs/bug-985074.t55
-rw-r--r--tests/bugs/bug-986429.t19
-rwxr-xr-xtests/bugs/bug-986905.t27
-rw-r--r--tests/bugs/bug-991622.t35
-rw-r--r--tests/bugs/getlk_owner.c120
-rwxr-xr-xtests/bugs/overlap.py59
-rwxr-xr-xtests/cluster.rc112
-rw-r--r--tests/dht.rc79
-rw-r--r--tests/fallocate.rc19
-rwxr-xr-xtests/features/glupy.t29
-rwxr-xr-xtests/features/readdir-ahead.t44
-rw-r--r--tests/fileio.rc61
-rw-r--r--tests/include.rc248
-rw-r--r--tests/nfs.rc21
-rwxr-xr-xtests/performance/open-behind.t63
-rw-r--r--tests/performance/quick-read.t55
-rwxr-xr-xtests/snapshot.rc251
-rwxr-xr-xtests/utils/create-files.py207
-rw-r--r--tests/volume.rc325
-rw-r--r--xlators/Makefile.am3
-rw-r--r--xlators/bindings/python/src/Makefile.am2
-rw-r--r--xlators/bindings/python/src/gluster.py23
-rw-r--r--xlators/bindings/python/src/glusterstack.py23
-rw-r--r--xlators/bindings/python/src/glustertypes.py23
-rw-r--r--xlators/bindings/python/src/python.c30
-rw-r--r--xlators/bindings/python/src/testxlator.py25
-rw-r--r--xlators/cluster/afr/src/Makefile.am10
-rw-r--r--xlators/cluster/afr/src/afr-common.c1228
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c294
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c863
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c650
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c1545
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h11
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c1033
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h4
-rw-r--r--xlators/cluster/afr/src/afr-open.c197
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c75
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c836
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h36
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c715
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c207
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c407
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h10
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c835
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h20
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c862
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h17
-rw-r--r--xlators/cluster/afr/src/afr.c217
-rw-r--r--xlators/cluster/afr/src/afr.h378
-rw-r--r--xlators/cluster/afr/src/pump.c37
-rw-r--r--xlators/cluster/dht/src/Makefile.am13
-rw-r--r--xlators/cluster/dht/src/dht-common.c1006
-rw-r--r--xlators/cluster/dht/src/dht-common.h152
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c179
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c77
-rw-r--r--xlators/cluster/dht/src/dht-helper.c432
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c84
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c445
-rw-r--r--xlators/cluster/dht/src/dht-layout.c159
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c144
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h2
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c316
-rw-r--r--xlators/cluster/dht/src/dht-rename.c112
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c466
-rw-r--r--xlators/cluster/dht/src/dht-shared.c758
-rw-r--r--xlators/cluster/dht/src/dht.c544
-rw-r--r--xlators/cluster/dht/src/nufa.c341
-rw-r--r--xlators/cluster/dht/src/switch.c257
-rw-r--r--xlators/cluster/ha/src/Makefile.am7
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c24
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h21
-rw-r--r--xlators/cluster/ha/src/ha.c32
-rw-r--r--xlators/cluster/ha/src/ha.h24
-rw-r--r--xlators/cluster/map/src/Makefile.am7
-rw-r--r--xlators/cluster/map/src/map-helper.c24
-rw-r--r--xlators/cluster/map/src/map-mem-types.h21
-rw-r--r--xlators/cluster/map/src/map.c27
-rw-r--r--xlators/cluster/map/src/map.h22
-rw-r--r--xlators/cluster/stripe/src/Makefile.am7
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c196
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h3
-rw-r--r--xlators/cluster/stripe/src/stripe.c1330
-rw-r--r--xlators/cluster/stripe/src/stripe.h71
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/error-gen/src/Makefile.am7
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h20
-rw-r--r--xlators/debug/error-gen/src/error-gen.c260
-rw-r--r--xlators/debug/error-gen/src/error-gen.h36
-rw-r--r--xlators/debug/io-stats/src/Makefile.am7
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h21
-rw-r--r--xlators/debug/io-stats/src/io-stats.c275
-rw-r--r--xlators/debug/trace/src/Makefile.am8
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h21
-rw-r--r--xlators/debug/trace/src/trace.c2881
-rw-r--r--xlators/debug/trace/src/trace.h98
-rw-r--r--xlators/encryption/Makefile.am2
-rw-r--r--xlators/encryption/crypt/Makefile.am (renamed from libglusterfsclient/Makefile.am)0
-rw-r--r--xlators/encryption/crypt/src/Makefile.am24
-rw-r--r--xlators/encryption/crypt/src/atom.c962
-rw-r--r--xlators/encryption/crypt/src/crypt-common.h141
-rw-r--r--xlators/encryption/crypt/src/crypt-mem-types.h43
-rw-r--r--xlators/encryption/crypt/src/crypt.c4498
-rw-r--r--xlators/encryption/crypt/src/crypt.h899
-rw-r--r--xlators/encryption/crypt/src/data.c769
-rw-r--r--xlators/encryption/crypt/src/keys.c302
-rw-r--r--xlators/encryption/crypt/src/metadata.c605
-rw-r--r--xlators/encryption/crypt/src/metadata.h74
-rw-r--r--xlators/encryption/rot-13/src/Makefile.am7
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c43
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h20
-rw-r--r--xlators/features/Makefile.am3
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am (renamed from scheduler/nufa/Makefile.am)2
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c87
-rw-r--r--xlators/features/changelog/lib/examples/python/changes.py32
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py64
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am37
-rw-r--r--xlators/features/changelog/lib/src/changelog.h31
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c180
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h97
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-process.c571
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c515
-rw-r--r--xlators/features/changelog/src/Makefile.am19
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c176
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h46
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c693
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h395
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h29
-rw-r--r--xlators/features/changelog/src/changelog-misc.h101
-rw-r--r--xlators/features/changelog/src/changelog-notifier.c314
-rw-r--r--xlators/features/changelog/src/changelog-notifier.h19
-rw-r--r--xlators/features/changelog/src/changelog-rt.c72
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c1477
-rw-r--r--xlators/features/compress/Makefile.am (renamed from mod_glusterfs/apache/2.2/Makefile.am)2
-rw-r--r--xlators/features/compress/src/Makefile.am17
-rw-r--r--xlators/features/compress/src/cdc-helper.c547
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h22
-rw-r--r--xlators/features/compress/src/cdc.c342
-rw-r--r--xlators/features/compress/src/cdc.h107
-rw-r--r--xlators/features/filter/src/Makefile.am7
-rw-r--r--xlators/features/filter/src/filter-mem-types.h20
-rw-r--r--xlators/features/filter/src/filter.c24
-rw-r--r--xlators/features/gfid-access/Makefile.am1
-rw-r--r--xlators/features/gfid-access/src/Makefile.am15
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h23
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1172
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h128
-rw-r--r--xlators/features/glupy/Makefile.am (renamed from scheduler/random/Makefile.am)2
-rw-r--r--xlators/features/glupy/doc/README.md44
-rw-r--r--xlators/features/glupy/doc/TESTING9
-rw-r--r--xlators/features/glupy/doc/test.vol10
-rw-r--r--xlators/features/glupy/src/Makefile.am20
-rw-r--r--xlators/features/glupy/src/debug-trace.py774
-rw-r--r--xlators/features/glupy/src/glupy.c2470
-rw-r--r--xlators/features/glupy/src/glupy.h69
-rw-r--r--xlators/features/glupy/src/gluster.py841
-rw-r--r--xlators/features/glupy/src/helloworld.py19
-rw-r--r--xlators/features/glupy/src/negative.py92
-rw-r--r--xlators/features/index/src/Makefile.am8
-rw-r--r--xlators/features/index/src/index-mem-types.h22
-rw-r--r--xlators/features/index/src/index.c403
-rw-r--r--xlators/features/index/src/index.h37
-rw-r--r--xlators/features/locks/src/Makefile.am9
-rw-r--r--xlators/features/locks/src/clear.c32
-rw-r--r--xlators/features/locks/src/clear.h22
-rw-r--r--xlators/features/locks/src/common.c216
-rw-r--r--xlators/features/locks/src/common.h81
-rw-r--r--xlators/features/locks/src/entrylk.c173
-rw-r--r--xlators/features/locks/src/inodelk.c245
-rw-r--r--xlators/features/locks/src/locks-mem-types.h20
-rw-r--r--xlators/features/locks/src/locks.h56
-rw-r--r--xlators/features/locks/src/posix.c797
-rw-r--r--xlators/features/locks/src/reservelk.c24
-rw-r--r--xlators/features/locks/tests/unit-test.c22
-rw-r--r--xlators/features/mac-compat/src/Makefile.am7
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c27
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am8
-rw-r--r--xlators/features/marker/src/marker-common.c37
-rw-r--r--xlators/features/marker/src/marker-common.h27
-rw-r--r--xlators/features/marker/src/marker-mem-types.h20
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c37
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h25
-rw-r--r--xlators/features/marker/src/marker-quota.c36
-rw-r--r--xlators/features/marker/src/marker-quota.h25
-rw-r--r--xlators/features/marker/src/marker.c325
-rw-r--r--xlators/features/marker/src/marker.h31
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h30
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py521
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py129
-rw-r--r--xlators/features/path-convertor/src/Makefile.am7
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h20
-rw-r--r--xlators/features/path-convertor/src/path.c27
-rw-r--r--xlators/features/protect/Makefile.am (renamed from mod_glusterfs/apache/1.3/Makefile.am)0
-rw-r--r--xlators/features/protect/src/Makefile.am21
-rw-r--r--xlators/features/protect/src/prot_client.c215
-rw-r--r--xlators/features/protect/src/prot_dht.c168
-rw-r--r--xlators/features/protect/src/prot_server.c51
-rw-r--r--xlators/features/qemu-block/Makefile.am1
-rw-r--r--xlators/features/qemu-block/src/Makefile.am155
-rw-r--r--xlators/features/qemu-block/src/bdrv-xlator.c397
-rw-r--r--xlators/features/qemu-block/src/bh-syncop.c48
-rw-r--r--xlators/features/qemu-block/src/clock-timer.c60
-rw-r--r--xlators/features/qemu-block/src/coroutine-synctask.c116
-rw-r--r--xlators/features/qemu-block/src/monitor-logging.c50
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.c662
-rw-r--r--xlators/features/qemu-block/src/qb-coroutines.h30
-rw-r--r--xlators/features/qemu-block/src/qemu-block-memory-types.h25
-rw-r--r--xlators/features/qemu-block/src/qemu-block.c1140
-rw-r--r--xlators/features/qemu-block/src/qemu-block.h109
-rw-r--r--xlators/features/quiesce/src/Makefile.am7
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h20
-rw-r--r--xlators/features/quiesce/src/quiesce.c47
-rw-r--r--xlators/features/quiesce/src/quiesce.h20
-rw-r--r--xlators/features/quota/src/Makefile.am7
-rw-r--r--xlators/features/quota/src/quota-mem-types.h20
-rw-r--r--xlators/features/quota/src/quota.c238
-rw-r--r--xlators/features/quota/src/quota.h36
-rw-r--r--xlators/features/read-only/src/Makefile.am9
-rw-r--r--xlators/features/read-only/src/read-only-common.c24
-rw-r--r--xlators/features/read-only/src/read-only-common.h24
-rw-r--r--xlators/features/read-only/src/read-only.c22
-rw-r--r--xlators/features/read-only/src/worm.c25
-rw-r--r--xlators/features/trash/src/Makefile.am7
-rw-r--r--xlators/features/trash/src/trash-mem-types.h20
-rw-r--r--xlators/features/trash/src/trash.c75
-rw-r--r--xlators/features/trash/src/trash.h22
-rw-r--r--xlators/lib/src/libxlator.c195
-rw-r--r--xlators/lib/src/libxlator.h112
-rw-r--r--xlators/meta/src/Makefile.am5
-rw-r--r--xlators/meta/src/meta-mem-types.h20
-rw-r--r--xlators/meta/src/meta.c20
-rw-r--r--xlators/meta/src/meta.h20
-rw-r--r--xlators/meta/src/misc.c20
-rw-r--r--xlators/meta/src/misc.h20
-rw-r--r--xlators/meta/src/tree.c22
-rw-r--r--xlators/meta/src/tree.h20
-rw-r--r--xlators/meta/src/view.c20
-rw-r--r--xlators/meta/src/view.h20
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c851
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c3360
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c2347
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c1009
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h73
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c637
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c107
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h28
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c924
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c1893
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h24
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c3223
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h79
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c140
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h22
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c96
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c414
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c806
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c1234
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c105
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h59
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c5590
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c2687
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h158
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c1595
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c4803
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h281
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c1150
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h115
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c1081
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c1452
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c736
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h574
-rw-r--r--xlators/mount/fuse/src/Makefile.am7
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c1900
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h262
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c193
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h21
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c233
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in253
-rw-r--r--xlators/nfs/server/src/Makefile.am17
-rw-r--r--xlators/nfs/server/src/acl3.c708
-rw-r--r--xlators/nfs/server/src/acl3.h31
-rw-r--r--xlators/nfs/server/src/mount3.c1079
-rw-r--r--xlators/nfs/server/src/mount3.h39
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c189
-rw-r--r--xlators/nfs/server/src/nfs-common.c69
-rw-r--r--xlators/nfs/server/src/nfs-common.h20
-rw-r--r--xlators/nfs/server/src/nfs-fops.c236
-rw-r--r--xlators/nfs/server/src/nfs-fops.h28
-rw-r--r--xlators/nfs/server/src/nfs-generics.c33
-rw-r--r--xlators/nfs/server/src/nfs-generics.h27
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c27
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h17
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h31
-rw-r--r--xlators/nfs/server/src/nfs.c781
-rw-r--r--xlators/nfs/server/src/nfs.h37
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c183
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h55
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c175
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h23
-rw-r--r--xlators/nfs/server/src/nfs3.c570
-rw-r--r--xlators/nfs/server/src/nfs3.h90
-rw-r--r--xlators/nfs/server/src/nlm4.c428
-rw-r--r--xlators/nfs/server/src/nlm4.h22
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c27
-rw-r--r--xlators/performance/Makefile.am2
-rw-r--r--xlators/performance/io-cache/src/Makefile.am8
-rw-r--r--xlators/performance/io-cache/src/io-cache.c218
-rw-r--r--xlators/performance/io-cache/src/io-cache.h2
-rw-r--r--xlators/performance/io-cache/src/page.c21
-rw-r--r--xlators/performance/io-threads/src/Makefile.am7
-rw-r--r--xlators/performance/io-threads/src/io-threads.c318
-rw-r--r--xlators/performance/io-threads/src/io-threads.h13
-rw-r--r--xlators/performance/md-cache/src/Makefile.am8
-rw-r--r--xlators/performance/md-cache/src/md-cache.c498
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am15
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h21
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1001
-rw-r--r--xlators/performance/quick-read/src/Makefile.am7
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h1
-rw-r--r--xlators/performance/quick-read/src/quick-read.c3727
-rw-r--r--xlators/performance/quick-read/src/quick-read.h57
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c148
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am (renamed from scheduler/alu/Makefile.am)2
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am15
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c560
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h46
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am7
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c3
-rw-r--r--xlators/performance/write-behind/src/Makefile.am7
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h1
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3631
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am16
-rw-r--r--xlators/playground/template/src/template.c49
-rw-r--r--xlators/playground/template/src/template.h24
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am8
-rw-r--r--xlators/protocol/auth/addr/src/addr.c20
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am7
-rw-r--r--xlators/protocol/auth/login/src/login.c20
-rw-r--r--xlators/protocol/client/src/Makefile.am13
-rw-r--r--xlators/protocol/client/src/client-callback.c6
-rw-r--r--xlators/protocol/client/src/client-handshake.c348
-rw-r--r--xlators/protocol/client/src/client-helpers.c77
-rw-r--r--xlators/protocol/client/src/client-lk.c360
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c (renamed from xlators/protocol/client/src/client3_1-fops.c)1536
-rw-r--r--xlators/protocol/client/src/client.c183
-rw-r--r--xlators/protocol/client/src/client.h62
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/Makefile.am3
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/Makefile.am3
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am26
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am19
-rw-r--r--xlators/protocol/server/src/authenticate.c91
-rw-r--r--xlators/protocol/server/src/authenticate.h19
-rw-r--r--xlators/protocol/server/src/server-handshake.c174
-rw-r--r--xlators/protocol/server/src/server-helpers.c1186
-rw-r--r--xlators/protocol/server/src/server-helpers.h74
-rw-r--r--xlators/protocol/server/src/server-mem-types.h19
-rw-r--r--xlators/protocol/server/src/server-resolve.c58
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c (renamed from xlators/protocol/server/src/server3_1-fops.c)3576
-rw-r--r--xlators/protocol/server/src/server.c767
-rw-r--r--xlators/protocol/server/src/server.h129
-rw-r--r--xlators/storage/Makefile.am6
-rw-r--r--xlators/storage/bd/Makefile.am3
-rw-r--r--xlators/storage/bd/src/Makefile.am20
-rw-r--r--xlators/storage/bd/src/bd-aio.c527
-rw-r--r--xlators/storage/bd/src/bd-aio.h41
-rw-r--r--xlators/storage/bd/src/bd-helper.c783
-rw-r--r--xlators/storage/bd/src/bd.c2404
-rw-r--r--xlators/storage/bd/src/bd.h178
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c154
-rw-r--r--xlators/storage/posix/src/posix-handle.h22
-rw-r--r--xlators/storage/posix/src/posix-helpers.c575
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c1596
-rw-r--r--xlators/storage/posix/src/posix.h96
-rw-r--r--xlators/system/posix-acl/src/Makefile.am10
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h40
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c121
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h71
1198 files changed, 196104 insertions, 115014 deletions
diff --git a/.gitignore b/.gitignore
index c5371b264..ff253c1da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,26 +8,41 @@ install-sh
ltmain.sh
Makefile.in
missing
+py-compile
*.sw?
*~
-*lo
-*la
-*o
+*.lo
+*.la
+*.o
+*.tar.gz
+*.rpm
.libs
+.deps
Makefile
stamp-h1
# Generated files
-extras/init.d/glusterfs-server.plist
-extras/init.d/glusterfsd-Debian
-extras/init.d/glusterfsd-Redhat
-extras/init.d/glusterfsd-SuSE
-glusterfs.spec
-libtool
-xlators/mount/fuse/utils/mount.glusterfs
-xlators/mount/fuse/utils/mount_glusterfs
+api/examples/__init__.py*
+api/examples/setup.py
argp-standalone/libargp.a
+contrib/uuid/uuid_types.h
+extras/init.d/glusterd-Debian
+extras/init.d/glusterd-Redhat
+extras/init.d/glusterd-SuSE
+extras/init.d/glusterd.plist
+extras/ocf/glusterd
+extras/ocf/volume
+extras/who-wrote-glusterfs/gitdm
+glusterfs-api.pc
+glusterfs.spec
glusterfsd/src/glusterfsd
+libgfchangelog.pc
libglusterfs/src/spec.lex.c
libglusterfs/src/y.tab.c
libglusterfs/src/y.tab.h
+libtool
+run-tests.sh
+ufo/.tox
+ufo/test/unit/.coverage
+xlators/mount/fuse/utils/mount.glusterfs
+xlators/mount/fuse/utils/mount_glusterfs
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 000000000..6bcd95dea
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,31 @@
+# .mailmap, see 'git short-log --help' for details
+#
+# Listing of contributors that filed patches with different email addresses.
+# Format: <name> <main-email> <alias> [<alias> ...]
+#
+
+Amar Tumballi <amarts@redhat.com> <amar@gluster.com> <amar@del.gluster.com>
+Anand Avati <avati@redhat.com> <avati@gluster.com> <avati@dev.gluster.com> <avati@amp.gluster.com> <avati@blackhole.gluster.com>
+Anush Shetty <ashetty@redhat.com> <anush@gluster.com>
+Csaba Henk <csaba@redhat.com> <csaba@gluster.com> <csaba@lowlife.hu> <csaba@zresearch.com>
+Harshavardhana <fharshav@redhat.com> <harsha@gluster.com> <harsha@zresearch.com> <harsha@dev.gluster.com> <harsha@harshavardhana.net>
+Kaleb S. KEITHLEY <kkeithle@redhat.com> <kkeithle@f16node1.kkeithle.usersys.redhat.com>
+Kaushal M <kaushal@redhat.com> <kaushal@gluster.com>
+Kaushik BV <kbudiger@redhat.com> <kaushikbv@gluster.com>
+Krishna Srinivas <ksriniva@redhat.com> <krishna@gluster.com> <krishna@zresearch.com> <krishna@guest-laptop>
+Krishnan Parthasarathi <kparthas@redhat.com> <kp@gluster.com>
+Louis Zuckerman <louiszuckerman@gmail.com> <me@louiszuckerman.com>
+M S Vishwanath Bhat <vbhat@redhat.com> <msvbhat@gmail.com> <vishwanath@gluster.com>
+Pavan Sondur <pavan@gluster.com> <pavan@dev.gluster.com>
+Pete Zaitcev <zaitcev@kotori.zaitcev.us> <zaitcev@yahoo.com>
+Pranith Kumar K <pkarampu@redhat.com> <pranithk@gluster.com>
+Raghavendra Bhat <raghavendra@redhat.com> <raghavendrabhat@gluster.com>
+Raghavendra G <rgowdapp@redhat.com> <raghavendra@gluster.com> <raghavendra@zresearch.com>
+Rahul C S <rahulcs@redhat.com> <rahulcssjce@gmail.com>
+Rajesh Amaravathi <rajesh@redhat.com> <rajesh@gluster.com> <rajesh.amaravathi@gmail.com>
+Shehjar Tikoo <shehjart@gluster.com> <shehjart@zresearch.com>
+Venky Shankar <vshankar@redhat.com> <venky@gluster.com>
+Vijay Bellur <vbellur@redhat.com> <vijay@gluster.com> <vijay@dev.gluster.com>
+Vijaykumar Koppad <vkoppad@redhat.com> <vijaykumar.koppad@gmail.com>
+Vikas Gorur <vikas@gluster.com> <vikas@zresearch.com>
+shishir gowda <sgowda@redhat.com> <shishirng@gluster.com>
diff --git a/CONTRIBUTING b/CONTRIBUTING
index 8b3baa7e5..7bccd88d7 100644
--- a/CONTRIBUTING
+++ b/CONTRIBUTING
@@ -22,4 +22,4 @@
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved. \ No newline at end of file
+ this project or the open source license(s) involved.
diff --git a/Makefile.am b/Makefile.am
index 6693bb876..598ebb410 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,8 +1,18 @@
-EXTRA_DIST = autogen.sh COPYING-GPLV2 COPYING-LGPLV3 INSTALL README AUTHORS THANKS NEWS glusterfs.spec
+EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 \
+ INSTALL README AUTHORS THANKS NEWS \
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
+ error-codes.json gf-error-codes.h.template \
+ gen-headers.py run-tests.sh \
+ $(shell find $(top_srcdir)/tests -type f -print)
-SUBDIRS = argp-standalone libglusterfs rpc xlators glusterfsd $(FUSERMOUNT_SUBDIR) doc extras cli
+SUBDIRS = argp-standalone libglusterfs rpc api xlators glusterfsd \
+ $(FUSERMOUNT_SUBDIR) doc extras cli @SYNCDAEMON_SUBDIR@
-CLEANFILES =
+pkgconfigdir = @pkgconfigdir@
+pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
+
+CLEANFILES =
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
@@ -11,4 +21,9 @@ gitclean: distclean
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
rm -fr argp-standalone/autom4te.cache
- rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in argp-standalone/configure argp-standalone/depcomp argp-standalone/install-sh argp-standalone/missing
+ rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in
+ rm -f argp-standalone/configure argp-standalone/depcomp
+ rm -f argp-standalone/install-sh argp-standalone/missing
+
+dist-hook:
+ (cd $(srcdir) && git diff && echo ===== git log ==== && git log) > $(distdir)/ChangeLog
diff --git a/api/Makefile.am b/api/Makefile.am
new file mode 100644
index 000000000..f0ad1ee97
--- /dev/null
+++ b/api/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src examples
diff --git a/api/examples/Makefile.am b/api/examples/Makefile.am
new file mode 100644
index 000000000..05f40ff53
--- /dev/null
+++ b/api/examples/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_PROGRAMS = glfsxmp
+glfsxmp_SOURCES = glfsxmp.c
+glfsxmp_CFLAGS = $(GLFS_CFLAGS) -Wall
+glfsxmp_LDADD = $(GLFS_LIBS) -lrt
+
+EXTRA_DIST = gfapi.py
diff --git a/api/examples/README b/api/examples/README
new file mode 100644
index 000000000..4d2b521f7
--- /dev/null
+++ b/api/examples/README
@@ -0,0 +1,36 @@
+This is an example application which uses libgfapi. It is
+a complete autotools based build system which demonstrates the
+required changes in configure.ac, Makefile.am etc to successfuly
+detect for and build an application against libgfapi.
+
+There are two approaches to building a libgfapi based application:
+
+1. In the presence of pkg-config in your build system.
+This is the recommended approach which is also used in this example.
+For this approach to work, you need to build glusterfs by passing
+--pkgconfigdir=/usr/lib64/pkgconfig (or the appropriate directory)
+in your distro. This already happens if you build RPMs with the
+glusterfs.spec provided in glusterfs.git. You will also need to
+install glusterfs-api RPM.
+
+2. In the absence of pkg-config in your build system.
+Make sure your LDFLAGS includes -L/path/to/lib where libgfapi.so is
+installed and -I/path/to/include/glusterfs where the 'api' directory
+containing the headers are available.
+
+glfsxmp.c
+=========
+
+glfsxmp.c is an example application which uses libgfapi
+
+Compilation Steps For glfsxmp.c
+===============================
+
+1. $./autogen.sh
+2. $./configure
+
+Note: Before running ./configure , as mentioned above, you need to
+ take care of #1 or #2 i.e. pkg-config path or LDFLAGS and
+ -I/<path> with correct values.
+
+3. $make glfsxmp
diff --git a/api/examples/autogen.sh b/api/examples/autogen.sh
new file mode 100755
index 000000000..1fee6be11
--- /dev/null
+++ b/api/examples/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+aclocal
+autoconf
+automake --foreign
diff --git a/api/examples/configure.ac b/api/examples/configure.ac
new file mode 100644
index 000000000..b80177a4e
--- /dev/null
+++ b/api/examples/configure.ac
@@ -0,0 +1,12 @@
+
+AC_INIT([glfs-test],[0.1],[gluster-devel@nongu.org])
+
+AM_INIT_AUTOMAKE
+
+AC_CONFIG_FILES([Makefile])
+
+AC_PROG_CC
+
+PKG_CHECK_MODULES([GLFS], [glusterfs-api >= 3])
+
+AC_OUTPUT
diff --git a/api/examples/gfapi.py b/api/examples/gfapi.py
new file mode 100755
index 000000000..3ac67f4d5
--- /dev/null
+++ b/api/examples/gfapi.py
@@ -0,0 +1,422 @@
+#!/usr/bin/python
+
+from ctypes import *
+from ctypes.util import find_library
+import os
+import sys
+import time
+import types
+
+# Looks like ctypes is having trouble with dependencies, so just force them to
+# load with RTLD_GLOBAL until I figure that out.
+glfs = CDLL(find_library("glusterfs"),RTLD_GLOBAL)
+xdr = CDLL(find_library("gfxdr"),RTLD_GLOBAL)
+api = CDLL(find_library("gfapi"),RTLD_GLOBAL)
+
+# Wow, the Linux kernel folks really play nasty games with this structure. If
+# you look at the man page for stat(2) and then at this definition you'll note
+# two discrepancies. First, we seem to have st_nlink and st_mode reversed. In
+# fact that's exactly how they're defined *for 64-bit systems*; for 32-bit
+# they're in the man-page order. Even uglier, the man page makes no mention of
+# the *nsec fields, but they are very much present and if they're not included
+# then we get memory corruption because libgfapi has a structure definition
+# that's longer than ours and they overwrite some random bit of memory after
+# the space we allocated. Yes, that's all very disgusting, and I'm still not
+# sure this will really work on 32-bit because all of the field types are so
+# obfuscated behind macros and feature checks.
+class Stat (Structure):
+ _fields_ = [
+ ("st_dev", c_ulong),
+ ("st_ino", c_ulong),
+ ("st_nlink", c_ulong),
+ ("st_mode", c_uint),
+ ("st_uid", c_uint),
+ ("st_gid", c_uint),
+ ("st_rdev", c_ulong),
+ ("st_size", c_ulong),
+ ("st_blksize", c_ulong),
+ ("st_blocks", c_ulong),
+ ("st_atime", c_ulong),
+ ("st_atimensec", c_ulong),
+ ("st_mtime", c_ulong),
+ ("st_mtimensec", c_ulong),
+ ("st_ctime", c_ulong),
+ ("st_ctimensec", c_ulong),
+ ]
+api.glfs_creat.restype = c_void_p
+api.glfs_open.restype = c_void_p
+api.glfs_lstat.restype = c_int
+api.glfs_lstat.argtypes = [c_void_p, c_char_p, POINTER(Stat)]
+
+class Dirent (Structure):
+ _fields_ = [
+ ("d_ino", c_ulong),
+ ("d_off", c_ulong),
+ ("d_reclen", c_ushort),
+ ("d_type", c_char),
+ ("d_name", c_char * 256),
+ ]
+api.glfs_opendir.restype = c_void_p
+api.glfs_readdir_r.restype = c_int
+api.glfs_readdir_r.argtypes = [c_void_p, POINTER(Dirent),
+ POINTER(POINTER(Dirent))]
+
+# There's a bit of ctypes glitchiness around __del__ functions and module-level
+# variables. If we unload the module while we still have references to File or
+# Volume objects, the module-level variables might have disappeared by the time
+# __del__ gets called. Therefore the objects hold references which they
+# release when __del__ is done. We only actually use the object-local values
+# in __del__; for clarity, we just use the simpler module-level form elsewhere.
+
+class File(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+
+ def __del__ (self):
+ self._api.glfs_close(self.fd)
+ self._api = None
+
+ # File operations, in alphabetical order.
+
+ def fsync (self):
+ return api.glfs_fsync(self.fd)
+
+ def read (self, buflen, flags=0):
+ rbuf = create_string_buffer(buflen)
+ rc = api.glfs_read(self.fd,rbuf,buflen,flags)
+ if rc > 0:
+ return rbuf.value[:rc]
+ else:
+ return rc
+
+ def read_buffer (self, buf, flags=0):
+ return api.glfs_read(self.fd,buf,len(buf),flags)
+
+ def write (self, data, flags=0):
+ return api.glfs_write(self.fd,data,len(data),flags)
+
+ def fallocate (self, mode, offset, len):
+ return api.glfs_fallocate(self.fd, mode, offset, len)
+
+ def discard (self, offset, len):
+ return api.glfs_discard(self.fd, offset, len)
+
+
+class Dir(object):
+
+ def __init__ (self, fd):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fd = fd
+ self.cursor = POINTER(Dirent)()
+
+ def __del__ (self):
+ self._api.glfs_closedir(self.fd)
+ self._api = None
+
+ def next (self):
+ entry = Dirent()
+ entry.d_reclen = 256
+ rc = api.glfs_readdir_r(self.fd,byref(entry),byref(self.cursor))
+ if (rc < 0) or (not self.cursor) or (not self.cursor.contents):
+ return rc
+ return entry
+
+class Volume(object):
+
+ # Housekeeping functions.
+
+ def __init__ (self, host, volid, proto="tcp", port=24007):
+ # Add a reference so the module-level variable "api" doesn't
+ # get yanked out from under us (see comment above File def'n).
+ self._api = api
+ self.fs = api.glfs_new(volid)
+ api.glfs_set_volfile_server(self.fs,proto,host,port)
+
+ def __del__ (self):
+ self._api.glfs_fini(self.fs)
+ self._api = None
+
+ def set_logging (self, path, level):
+ api.glfs_set_logging(self.fs,path,level)
+
+ def mount (self):
+ api.glfs_init(self.fs)
+
+ # File operations, in alphabetical order.
+
+ def creat (self, path, flags, mode):
+ fd = api.glfs_creat(self.fs,path,flags,mode)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def getxattr (self, path, key, maxlen):
+ buf = create_string_buffer(maxlen)
+ rc = api.glfs_getxattr(self.fs,path,key,buf,maxlen)
+ if rc < 0:
+ return rc
+ return buf.value[:rc]
+
+ def listxattr (self, path):
+ buf = create_string_buffer(512)
+ rc = api.glfs_listxattr(self.fs,path,buf,512)
+ if rc < 0:
+ return rc
+ xattrs = []
+ # Parsing character by character is ugly, but it seems like the
+ # easiest way to deal with the "strings separated by NUL in one
+ # buffer" format.
+ i = 0
+ while i < rc:
+ new_xa = buf.raw[i]
+ i += 1
+ while i < rc:
+ next_char = buf.raw[i]
+ i += 1
+ if next_char == '\0':
+ xattrs.append(new_xa)
+ break
+ new_xa += next_char
+ xattrs.sort()
+ return xattrs
+
+ def lstat (self, path):
+ x = Stat()
+ rc = api.glfs_lstat(self.fs,path,byref(x))
+ if rc >= 0:
+ return x
+ else:
+ return rc
+
+ def mkdir (self, path):
+ return api.glfs_mkdir(self.fs,path)
+
+ def open (self, path, flags):
+ fd = api.glfs_open(self.fs,path,flags)
+ if not fd:
+ return fd
+ return File(fd)
+
+ def opendir (self, path):
+ fd = api.glfs_opendir(self.fs,path)
+ if not fd:
+ return fd
+ return Dir(fd)
+
+ def rename (self, opath, npath):
+ return api.glfs_rename(self.fs,opath,npath)
+
+ def rmdir (self, path):
+ return api.glfs_rmdir(self.fs,path)
+
+ def setxattr (self, path, key, value, vlen):
+ return api.glfs_setxattr(self.fs,path,key,value,vlen,0)
+
+ def unlink (self, path):
+ return api.glfs_unlink(self.fs,path)
+
+if __name__ == "__main__":
+ def test_create_write (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.write(data)
+ if rc != len(data):
+ return False, "wrote %d/%d bytes" % (rc, len(data))
+ return True, "wrote %d bytes" % rc
+
+ # TBD: this test fails if we do create, open, write, read
+ def test_open_read (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.open(mypath,os.O_RDONLY)
+ if not fd:
+ return False, "open error"
+ dlen = len(data) * 2
+ buf = fd.read(dlen)
+ if type(buf) == types.IntType:
+ return False, "read error %d" % buf
+ if len(buf) != len(data):
+ return False, "read %d/%d bytes" % (len(buf), len(data))
+ return True, "read '%s'" % buf
+
+ def test_lstat (vol, path, data):
+ mypath = path + ".io"
+ sb = vol.lstat(mypath)
+ if type(sb) == types.IntType:
+ return False, "lstat error %d" % sb
+ if sb.st_size != len(data):
+ return False, "lstat size is %d, expected %d" % (
+ sb.st_size, len(data))
+ return True, "lstat got correct size %d" % sb.st_size
+
+ def test_rename (vol, path, data):
+ opath = path + ".io"
+ npath = path + ".tmp"
+ rc = vol.rename(opath,npath)
+ if rc < 0:
+ return False, "rename error %d" % rc
+ ofd = vol.open(opath,os.O_RDWR)
+ if isinstance(ofd,File):
+ return False, "old path working after rename"
+ nfd = vol.open(npath,os.O_RDWR)
+ if isinstance(nfd,File):
+ return False, "new path not working after rename"
+ return True, "rename worked"
+
+ def test_unlink (vol, path, data):
+ mypath = path + ".tmp"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink error %d" % fd
+ fd = vol.open(mypath,os.O_RDWR)
+ if isinstance(fd,File):
+ return False, "path still usable after unlink"
+ return True, "unlink worked"
+
+ def test_mkdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.mkdir(mypath)
+ if rc < 0:
+ return False, "mkdir error %d" % rc
+ return True, "mkdir worked"
+
+ def test_create_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ fd = vol.creat(mypath,os.O_RDWR,0644)
+ if not isinstance(fd,File):
+ return False, "create (in dir) error"
+ return True, "create (in dir) worked"
+
+ def test_dir_listing (vol, path, data):
+ mypath = path + ".dir"
+ fd = vol.opendir(mypath)
+ if not isinstance(fd,Dir):
+ return False, "opendir error %d" % fd
+ files = []
+ while True:
+ ent = fd.next()
+ if not isinstance(ent,Dirent):
+ break
+ name = ent.d_name[:ent.d_reclen]
+ files.append(name)
+ if files != [".", "..", "probe"]:
+ return False, "wrong directory contents"
+ return True, "directory listing worked"
+
+ def test_unlink_in_dir (vol, path, data):
+ mypath = path + ".dir/probe"
+ rc = vol.unlink(mypath)
+ if rc < 0:
+ return False, "unlink (in dir) error %d" % rc
+ return True, "unlink (in dir) worked"
+
+ def test_rmdir (vol, path, data):
+ mypath = path + ".dir"
+ rc = vol.rmdir(mypath)
+ if rc < 0:
+ return False, "rmdir error %d" % rc
+ sb = vol.lstat(mypath)
+ if not isinstance(sb,Stat):
+ return False, "dir still there after rmdir"
+ return True, "rmdir worked"
+
+ def test_setxattr (vol, path, data):
+ mypath = path + ".xa"
+ fd = vol.creat(mypath,os.O_RDWR|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat (xattr test) error"
+ key1, key2 = "hello", "goodbye"
+ if vol.setxattr(mypath,"trusted.key1",key1,len(key1)) < 0:
+ return False, "setxattr (key1) error"
+ if vol.setxattr(mypath,"trusted.key2",key2,len(key2)) < 0:
+ return False, "setxattr (key2) error"
+ return True, "setxattr worked"
+
+ def test_getxattr (vol, path, data):
+ mypath = path + ".xa"
+ buf = vol.getxattr(mypath,"trusted.key1",32)
+ if type(buf) == types.IntType:
+ return False, "getxattr error"
+ if buf != "hello":
+ return False, "wrong getxattr value %s" % buf
+ return True, "getxattr worked"
+
+ def test_listxattr (vol, path, data):
+ mypath = path + ".xa"
+ xattrs = vol.listxattr(mypath)
+ if type(xattrs) == types.IntType:
+ return False, "listxattr error"
+ if xattrs != ["trusted.key1","trusted.key2"]:
+ return False, "wrong listxattr value %s" % repr(xattrs)
+ return True, "listxattr worked"
+
+ def test_fallocate (vol, path, data):
+ mypath = path + ".io"
+ fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644)
+ if not fd:
+ return False, "creat error"
+ rc = fd.fallocate(0, 0, 1024*1024)
+ if rc != 0:
+ return False, "fallocate error"
+ rc = fd.discard(4096, 4096)
+ if rc != 0:
+ return False, "discard error"
+ return True, "fallocate/discard worked"
+
+ test_list = (
+ test_create_write,
+ test_open_read,
+ test_lstat,
+ test_rename,
+ test_unlink,
+ test_mkdir,
+ test_create_in_dir,
+ test_dir_listing,
+ test_unlink_in_dir,
+ test_rmdir,
+ test_setxattr,
+ test_getxattr,
+ test_listxattr,
+ test_fallocate,
+ )
+
+ ok_to_fail = (
+ # TBD: this fails opening the new file, even though the file
+ # did get renamed. Looks like a gfapi bug, not ours.
+ (test_rename, "new path not working after rename"),
+ # TBD: similar, call returns error even though it worked
+ (test_rmdir, "dir still there after rmdir"),
+ )
+
+ volid, path = sys.argv[1:3]
+ data = "fubar"
+ vol = Volume("localhost",volid)
+ vol.set_logging("/dev/null",7)
+ #vol.set_logging("/dev/stderr",7)
+ vol.mount()
+
+ failures = 0
+ expected = 0
+ for t in test_list:
+ rc, msg = t(vol,path,data)
+ if rc:
+ print "PASS: %s" % msg
+ else:
+ print "FAIL: %s" % msg
+ failures += 1
+ for otf in ok_to_fail:
+ if (t == otf[0]) and (msg == otf[1]):
+ print " (skipping known failure)"
+ expected += 1
+ break # from the *inner* for loop
+ else:
+ break # from the *outer* for loop
+
+ print "%d failures (%d expected)" % (failures, expected)
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
new file mode 100644
index 000000000..600d72fb5
--- /dev/null
+++ b/api/examples/glfsxmp.c
@@ -0,0 +1,1598 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "api/glfs.h"
+#include "api/glfs-handles.h"
+#include <string.h>
+#include <time.h>
+
+
+int
+test_dirops (glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir (fs, "/");
+ if (!fd) {
+ fprintf (stderr, "/: %s\n", strerror (errno));
+ return -1;
+ }
+
+ fprintf (stderr, "Entries:\n");
+ while (glfs_readdir_r (fd, (struct dirent *)buf, &entry), entry) {
+ fprintf (stderr, "%s: %lu\n", entry->d_name, glfs_telldir (fd));
+ }
+
+ glfs_closedir (fd);
+ return 0;
+}
+
+
+int
+test_xattr (glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr (fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_setxattr (fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf (stderr, "setxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_listxattr (fs, filename, buf, 512);
+ fprintf (stderr, "listxattr(%s): %d (%s)\n", filename, ret,
+ strerror (errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf ("key=%s\n", ptr);
+ ptr += strlen (ptr);
+ }
+
+ return 0;
+}
+
+
+int
+test_chdir (glfs_t *fs)
+{
+ int ret = -1;
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir (fs, topdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", topdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink (fs, topdir, linkdir);
+ if (ret) {
+ fprintf (stderr, "symlink(%s, %s): %s\n", topdir, linkdir, strerror (errno));
+ return -1;
+ }
+
+ ret = glfs_chdir (fs, linkdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", linkdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (respath) {
+ fprintf (stderr, "realpath(%s) worked unexpectedly: %s\n", subdir, respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir (fs, subdir, 0755);
+ if (ret) {
+ fprintf (stderr, "mkdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_realpath (fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(%s): %s\n", subdir, strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir (fs, subdir);
+ if (ret) {
+ fprintf (stderr, "chdir(%s): %s\n", subdir, strerror (errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd (fs, pathbuf, 4096);
+ fprintf (stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath (fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf (stderr, "realpath(/linkdir/subdir): %s\n", strerror (errno));
+ } else {
+ fprintf (stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat (struct stat *sb)
+{
+ printf ("Dumping stat information:\n");
+ printf ("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK: printf ("block device\n"); break;
+ case S_IFCHR: printf ("character device\n"); break;
+ case S_IFDIR: printf ("directory\n"); break;
+ case S_IFIFO: printf ("FIFO/pipe\n"); break;
+ case S_IFLNK: printf ("symlink\n"); break;
+ case S_IFREG: printf ("regular file\n"); break;
+ case S_IFSOCK: printf ("socket\n"); break;
+ default: printf ("unknown?\n"); break;
+ }
+
+ printf ("I-node number: %ld\n", (long) sb->st_ino);
+
+ printf ("Mode: %lo (octal)\n",
+ (unsigned long) sb->st_mode);
+
+ printf ("Link count: %ld\n", (long) sb->st_nlink);
+ printf ("Ownership: UID=%ld GID=%ld\n",
+ (long) sb->st_uid, (long) sb->st_gid);
+
+ printf ("Preferred I/O block size: %ld bytes\n",
+ (long) sb->st_blksize);
+ printf ("File size: %lld bytes\n",
+ (long long) sb->st_size);
+ printf ("Blocks allocated: %lld\n",
+ (long long) sb->st_blocks);
+
+ printf ("Last status change: %s", ctime(&sb->st_ctime));
+ printf ("Last file access: %s", ctime(&sb->st_atime));
+ printf ("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle (unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++)
+ {
+ printf (":%02x:", glid[i]);
+ }
+ printf ("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat (struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle (unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink (void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf ("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir (fs, dir, my_subdir, 0644, &sb);
+ if (subdir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat (fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, subdir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant regular file */
+ ret = glfs_h_unlink (fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_file, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existant directory */
+ ret = glfs_h_unlink (fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking non-existant %s: invalid errno ,%d, %s\n",
+ my_subdir, ret, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink (fs, parent, my_dir);
+ if (ret) {
+ fprintf (stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close (dir);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (subdir)
+ glfs_h_close (subdir);
+ if (subleaf)
+ glfs_h_close (subleaf);
+ if (parent)
+ glfs_h_close (parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs (void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_getattrs (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime (CLOCK_REALTIME, &timestamp);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat (&retsb);
+
+ ret = glfs_h_setattrs (fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_setattrs: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof (struct stat));
+ ret = glfs_h_stat (fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_stat: error %s: from (%p),%s\n",
+ my_dir, dir, strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&retsb);
+
+ printf ("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dir)
+ glfs_h_close (dir);
+
+ return;
+}
+
+void
+test_h_truncate (void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ my_file, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy (buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate (fs, leaf, offset);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs (fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_getattrs: error for %s (%p),%s\n",
+ my_file, leaf, strerror (errno));
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf (stderr, "glfs_h_truncate: post size mismatch\n");
+ printf ("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close (fd);
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return;
+}
+
+void
+test_h_links (void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, linksrc_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, linktgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink (fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf (stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ buf = calloc (1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf (stderr, "Error allocating memory\n");
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink (fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf (stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp (buf, my_symlnk, strlen (my_symlnk)))) {
+ fprintf (stderr, "glfs_h_readlink: error mismatch in link name: actual %s: retrieved %s\n",
+ my_symlnk, buf);
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link (fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link (fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror (errno));
+ printf ("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf ("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+ if (ln1)
+ glfs_h_close (ln1);
+ if (buf)
+ free (buf);
+
+ return;
+}
+
+void
+test_h_rename (void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_mkdir (fs, root, my_dir, 0644, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirsrc = glfs_h_mkdir (fs, parent, src_dir, 0644, &sb);
+ if (dirsrc == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dirtgt = glfs_h_mkdir (fs, parent, tgt_dir, 0644, &sb);
+ if (dirtgt == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ dleaf = glfs_h_creat (fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename (fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename (fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename (fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir1", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename (fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf (stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "dir2", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename (fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename (fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf (stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror (errno));
+ printf ("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf ("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+ if (dirsrc)
+ glfs_h_close (dirsrc);
+ if (dirtgt)
+ glfs_h_close (dirtgt);
+ if (dleaf)
+ glfs_h_close (dleaf);
+
+ return;
+}
+
+void
+assimilatetime (struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance (void)
+{
+ char *my_dir = "perftest",
+ *full_dir_path="/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir (fs, parent, my_dir, 0644, &sb);
+ if (dir == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* create performance */
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat (fs, dir, my_file_name, &sb);
+ if (leaf != NULL) {
+ fprintf (stderr, "glfs_h_lookup: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat (fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close (leaf); leaf = NULL;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf (my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir (fs, my_file_name, 0644);
+ if (ret != 0) {
+ fprintf (stderr, "glfs_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf (my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_st);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat (fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf (stderr, "glfs_stat: exists %s\n",
+ my_file_name);
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror (errno));
+ printf ("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &c_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n",
+ strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&c_ts, c_ts_st, c_ts_ed);
+ glfs_close (fd);
+ }
+
+ ret = clock_gettime (CLOCK_REALTIME, &o_ts_ed);
+ if(ret != 0) {
+ fprintf (stderr, "clock_gettime: error %s\n", strerror (errno));
+ printf ("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime (&o_ts, o_ts_st, o_ts_ed);
+
+ printf ("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf ("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ o_ts.tv_sec, o_ts.tv_nsec);
+ printf ("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops (int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {0, };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt",
+ *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1",
+ *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1",
+ *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir (fs, full_parent_name, 0644);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf (stderr, "%s: (%p) %s\n", full_parent_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ else if (ret != 0) {
+ printf ("Found test directory %s to be existing\n",
+ full_parent_name);
+ printf ("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat (fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf (stderr, "%s: (%p) %s\n", full_leaf_name, fd,
+ strerror (errno));
+ printf ("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close (fd);
+
+ printf ("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf ("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat (fs, root, parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat (fs, NULL, "/", &sb);
+ if (root == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ "/", NULL, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_lookupat (fs, NULL, full_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror (errno));
+ printf ("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (leaf); leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat (fs, root, relative_leaf_name, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror (errno));
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* reset */
+ glfs_h_close (root); root = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf ("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf ("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", readbuf,
+ writebuf);
+ glfs_close (fd);
+ printf ("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_close (fd);
+
+ printf ("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf ("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_creat (fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644,
+ &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_creat: existing file, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat (fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf (stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close (tmp); tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf ("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: In Progress\n");
+ /* TODO: Change the lookup to creat below for a GIFD recovery falure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat (fs, parent, leaf_name1, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ ret = glfs_h_extract_handle (leaf, leaf_handle,
+ GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf (stderr, "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle (leaf_handle);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_create_from_handle (fs, leaf_handle, GFAPI_HANDLE_LENGTH,
+ &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ fd = glfs_h_open (fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf (stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror (errno));
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy (writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd, 0, SEEK_SET);
+
+ ret = glfs_read (fd, readbuf, 32, 0);
+ if (memcmp (readbuf, writebuf, 32)) {
+ printf ("Failed to read what I wrote: %s %s\n", writebuf,
+ writebuf);
+ printf ("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close (fd);
+ goto out;
+ }
+
+ glfs_close (fd);
+ glfs_h_close (leaf); leaf = NULL;
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf ("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir (fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_rmdir: Failed for %s: %s\n",
+ full_newparent_name, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mkdir (fs, parent, newparent_name, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf ("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink (fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf (stderr, "glfs_unlink: Failed for %s: %s\n",
+ full_newnod_name, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat (fs, NULL, full_parent_name, &sb);
+ if (parent == NULL) {
+ fprintf (stderr, "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf (stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat (&sb);
+
+ /* TODO: creat op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno = %s\n",
+ tmp, strerror (errno));
+ printf ("glfs_h_creat/mknod tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close (leaf); leaf = NULL;
+
+ leaf = glfs_h_mknod (fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf (stderr, "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n",
+ leaf, strerror (errno));
+ printf ("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close (leaf); leaf = NULL;
+ }
+ }
+
+ glfs_h_close (parent); parent = NULL;
+
+ printf ("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink ();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs ();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links ();
+
+ /* rename tests */
+ test_h_rename ();
+
+ /* performance tests */
+ test_h_performance ();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close (root);
+ if (parent)
+ glfs_h_close (parent);
+ if (leaf)
+ glfs_h_close (leaf);
+
+ return ret;
+}
+
+int
+main (int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {0, };
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+
+ if (argc != 3) {
+ printf ("Expect following args\n\t%s <volname> <hostname>\n", argv[0]);
+ return -1;
+ }
+
+ fs = glfs_new (argv[1]);
+ if (!fs) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+// ret = glfs_set_volfile (fs, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs, "tcp", argv[2], 24007);
+
+// ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+
+ ret = glfs_set_logging (fs, "/dev/stderr", 7);
+
+ ret = glfs_init (fs);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ sleep (2);
+
+ fs2 = glfs_new (argv[1]);
+ if (!fs2) {
+ fprintf (stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+
+// ret = glfs_set_volfile (fs2, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server (fs2, "tcp", argv[2], 24007);
+
+ ret = glfs_set_logging (fs2, "/dev/stderr", 7);
+
+ ret = glfs_init (fs2);
+
+ fprintf (stderr, "glfs_init: returned %d\n", ret);
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ fd = glfs_creat (fs, filename, O_RDWR, 0644);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ fd2 = glfs_open (fs2, filename, O_RDWR);
+ fprintf (stderr, "%s: (%p) %s\n", filename, fd, strerror (errno));
+
+ sprintf (writebuf, "hi there\n");
+ ret = glfs_write (fd, writebuf, 32, 0);
+
+ glfs_lseek (fd2, 0, SEEK_SET);
+
+ ret = glfs_read (fd2, readbuf, 32, 0);
+
+ printf ("read %d, %s", ret, readbuf);
+
+ glfs_close (fd);
+ glfs_close (fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod (fs, filename, S_IFIFO, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+
+ ret = glfs_rename (fs, filename, "/filename4");
+ fprintf (stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror (errno));
+
+ ret = glfs_unlink (fs, "/filename4");
+ fprintf (stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror (errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir (fs, filename, 0);
+ fprintf (stderr, "%s: (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_lstat (fs, filename, &sb);
+ fprintf (stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ ret = glfs_rmdir (fs, filename);
+ fprintf (stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror (errno));
+
+ test_dirops (fs);
+
+ test_xattr (fs);
+
+ test_chdir (fs);
+
+ test_handleops (argc, argv);
+ // done
+
+ glfs_fini (fs);
+ glfs_fini (fs2);
+
+ return ret;
+}
diff --git a/api/examples/setup.py.in b/api/examples/setup.py.in
new file mode 100644
index 000000000..44b738094
--- /dev/null
+++ b/api/examples/setup.py.in
@@ -0,0 +1,29 @@
+from distutils.core import setup
+
+# generate a __init__.py for the package namespace
+fo = open('__init__.py', 'w')
+fo.write('__version__ = "@PACKAGE_VERSION@"\n')
+fo.close()
+
+DESC = """GlusterFS is a clustered file-system capable of scaling to
+several petabytes. It aggregates various storage bricks over Infiniband
+RDMA or TCP/IP interconnect into one large parallel network file system.
+GlusterFS is one of the most sophisticated file systems in terms of
+features and extensibility. It borrows a powerful concept called
+Translators from GNU Hurd kernel. Much of the code in GlusterFS is in
+user space and easily manageable.
+
+This package contains the Python interface to the libgfapi library."""
+
+setup(
+ name='glusterfs-api',
+ version='@PACKAGE_VERSION@',
+ description='Python client library for the GlusterFS libgfapi',
+ long_description=DESC,
+ author='Gluster Community',
+ author_email='gluster-devel@nongnu.org',
+ license='LGPLv3',
+ url='http://gluster.org/',
+ package_dir={'gluster':''},
+ packages=['gluster']
+)
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
new file mode 100644
index 000000000..7c5df3e20
--- /dev/null
+++ b/api/src/Makefile.am
@@ -0,0 +1,36 @@
+lib_LTLIBRARIES = libgfapi.la
+noinst_HEADERS = glfs-mem-types.h glfs-internal.h
+libgfapi_HEADERS = glfs.h glfs-handles.h
+libgfapidir = $(includedir)/glusterfs/api
+
+libgfapi_la_SOURCES = glfs.c glfs-mgmt.c glfs-fops.c glfs-resolve.c \
+ glfs-handleops.c
+libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(GF_LDADD)
+
+libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src
+
+
+xlator_LTLIBRARIES = api.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+# workaround for broken parallel install support in automake with LTLIBRARIES
+# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=7328
+install_xlatorLTLIBRARIES = install-xlatorLTLIBRARIES
+$(install_xlatorLTLIBRARIES): install-libLTLIBRARIES
+
+api_la_SOURCES = glfs-master.c
+api_la_DEPENDENCIES = libgfapi.la
+api_la_LDFLAGS = -module -avoid-version
+api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/api/src/libgfapi.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
new file mode 100644
index 000000000..10bb7d38b
--- /dev/null
+++ b/api/src/glfs-fops.c
@@ -0,0 +1,3252 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include <limits.h>
+
+#ifdef NAME_MAX
+#define GF_NAME_MAX NAME_MAX
+#else
+#define GF_NAME_MAX 255
+#endif
+
+#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+
+int
+glfs_loc_link (loc_t *loc, struct iatt *iatt)
+{
+ int ret = -1;
+ inode_t *linked_inode = NULL;
+
+ if (!loc->inode) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ linked_inode = inode_link (loc->inode, loc->parent, loc->name, iatt);
+ if (linked_inode) {
+ inode_lookup (linked_inode);
+ inode_unref (linked_inode);
+ ret = 0;
+ } else {
+ ret = -1;
+ errno = ENOMEM;
+ }
+
+ return ret;
+}
+
+
+void
+glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat)
+{
+ iatt_to_stat (iatt, stat);
+ stat->st_dev = fs->dev_id;
+}
+
+
+int
+glfs_loc_unlink (loc_t *loc)
+{
+ inode_unlink (loc->inode, loc->parent, loc->name);
+
+ return 0;
+}
+
+
+struct glfs_fd *
+glfs_open (struct glfs *fs, const char *path, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_close (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_flush (subvol, fd);
+out:
+ fs = glfd->fs;
+ glfs_fd_destroy (glfd);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_lstat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_stat (struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fstat (struct glfs_fd *glfd, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct iatt iatt = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fstat (subvol, fd, &iatt);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat (glfd->fs, &iatt, stat);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_creat (struct glfs *fs, const char *path, int flags, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ /* This must be glfs_resolve() and NOT glfs_lresolve().
+ That is because open("name", O_CREAT) where "name"
+ is a danging symlink must create the dangling
+ destinataion.
+ */
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ if (loc.inode) {
+ if (flags & O_EXCL) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (ret == 0) {
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+ } else {
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+off_t
+glfs_lseek (struct glfs_fd *glfd, off_t offset, int whence)
+{
+ struct stat sb = {0, };
+ int ret = -1;
+
+ __glfs_entry_fd (glfd);
+
+ switch (whence) {
+ case SEEK_SET:
+ glfd->offset = offset;
+ break;
+ case SEEK_CUR:
+ glfd->offset += offset;
+ break;
+ case SEEK_END:
+ ret = glfs_fstat (glfd, &sb);
+ if (ret) {
+ /* seek cannot fail :O */
+ break;
+ }
+ glfd->offset = sb.st_size + offset;
+ break;
+ }
+
+ return glfd->offset;
+}
+
+
+//////////////
+
+ssize_t
+glfs_preadv (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+ struct iovec *iov = NULL;
+ int cnt = 0;
+ struct iobref *iobref = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ ret = syncop_readv (subvol, fd, size, offset, 0, &iov, &cnt, &iobref);
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy (iovec, iovcnt, iov, cnt); /* FIXME!!! */
+
+ glfd->offset = (offset + size);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE (iov);
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_read (struct glfs_fd *glfd, void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pread (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_readv (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+ off_t offset;
+ struct iovec *iov;
+ int count;
+ int flags;
+ glfs_io_cbk fn;
+ void *data;
+};
+
+
+static int
+glfs_io_async_cbk (int ret, call_frame_t *frame, void *data)
+{
+ struct glfs_io *gio = data;
+
+ gio->fn (gio->glfd, ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+
+ return 0;
+}
+
+
+static int
+glfs_io_async_task (void *data)
+{
+ struct glfs_io *gio = data;
+ ssize_t ret = 0;
+
+ switch (gio->op) {
+ case GF_FOP_WRITE:
+ ret = glfs_pwritev (gio->glfd, gio->iov, gio->count,
+ gio->offset, gio->flags);
+ break;
+ case GF_FOP_FTRUNCATE:
+ ret = glfs_ftruncate (gio->glfd, gio->offset);
+ break;
+ case GF_FOP_FSYNC:
+ if (gio->flags)
+ ret = glfs_fdatasync (gio->glfd);
+ else
+ ret = glfs_fsync (gio->glfd);
+ break;
+ case GF_FOP_DISCARD:
+ ret = glfs_discard (gio->glfd, gio->offset, gio->count);
+ break;
+ case GF_FOP_ZEROFILL:
+ ret = glfs_zerofill(gio->glfd, gio->offset, gio->count);
+ break;
+ }
+
+ return (int) ret;
+}
+
+
+int
+glfs_preadv_async_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iovec *iovec,
+ int count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct glfs_io *gio = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ struct glfs_fd *glfd = NULL;
+
+
+ gio = frame->local;
+ frame->local = NULL;
+ subvol = cookie;
+ glfd = gio->glfd;
+ fs = glfd->fs;
+
+ if (op_ret <= 0)
+ goto out;
+
+ op_ret = iov_copy (gio->iov, gio->count, iovec, count);
+
+ glfd->offset = gio->offset + op_ret;
+out:
+ errno = op_errno;
+ gio->fn (gio->glfd, op_ret, gio->data);
+
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+
+ return 0;
+}
+
+
+int
+glfs_preadv_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ glfs_t *fs = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fs = glfd->fs;
+
+ frame = syncop_create_frame (THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_READ;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ STACK_WIND_COOKIE (frame, glfs_preadv_async_cbk, subvol, subvol,
+ subvol->fops->readv, fd, iov_length (iovec, count),
+ offset, flags, NULL);
+
+out:
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ STACK_DESTROY (frame->root);
+ glfs_subvol_done (fs, subvol);
+ }
+
+ if (fd)
+ fd_unref (fd);
+
+ return ret;
+}
+
+
+int
+glfs_read_async (struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pread_async (struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_readv_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_preadv_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+///// writev /////
+
+ssize_t
+glfs_pwritev (struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ size_t size = -1;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length (iovec, iovcnt);
+
+ iobuf = iobuf_get2 (subvol->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ iobuf_unref (iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add (iobref, iobuf);
+ if (ret) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload (iobuf_ptr (iobuf), iovec, iovcnt); /* FIXME!!! */
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = size;
+
+ ret = syncop_writev (subvol, fd, &iov, 1, offset, iobref, flags);
+
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+
+ if (ret <= 0)
+ goto out;
+
+ glfd->offset = (offset + size);
+
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_write (struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+
+
+ssize_t
+glfs_writev (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev (glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_pwrite (struct glfs_fd *glfd, const void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev (glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+
+int
+glfs_pwritev_async (struct glfs_fd *glfd, const struct iovec *iovec, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->iov = iov_dup (iovec, count);
+ if (!gio->iov) {
+ GF_FREE (gio);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_WRITE;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_write_async (struct glfs_fd *glfd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, glfd->offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_pwrite_async (struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {0, };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *) buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async (glfd, &iov, 1, offset, flags, fn, data);
+
+ return ret;
+}
+
+
+int
+glfs_writev_async (struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ ret = glfs_pwritev_async (glfd, iov, count, glfd->offset, flags,
+ fn, data);
+ return ret;
+}
+
+
+int
+glfs_fsync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+static int
+glfs_fsync_async_common (struct glfs_fd *glfd, glfs_io_cbk fn, void *data,
+ int dataonly)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FSYNC;
+ gio->glfd = glfd;
+ gio->flags = dataonly;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+
+}
+
+
+int
+glfs_fsync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 0);
+}
+
+
+int
+glfs_fdatasync (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsync (subvol, fd, 1);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fdatasync_async (struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ return glfs_fsync_async_common (glfd, fn, data, 1);
+}
+
+
+int
+glfs_ftruncate (struct glfs_fd *glfd, off_t offset)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_ftruncate (subvol, fd, offset);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_ftruncate_async (struct glfs_fd *glfd, off_t offset,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_FTRUNCATE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_access (struct glfs *fs, const char *path, int mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_access (subvol, &loc, mode);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_symlink (struct glfs *fs, const char *data, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_readlink (struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+ char *linkval = NULL;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFLNK) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+ if (ret > 0) {
+ memcpy (buf, linkval, ret);
+ GF_FREE (linkval);
+ }
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mknod (struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_mkdir (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new (loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link (&loc, &iatt);
+out:
+ loc_wipe (&loc);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_unlink (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ ret = syncop_unlink (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rmdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ ret = syncop_rmdir (subvol, &loc);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_rename (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ if (reval < DEFAULT_REVAL_COUNT) {
+ reval++;
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ goto retry;
+ }
+ }
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_link (struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve (fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve (fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &newloc, retrynew);
+
+ if (ret == 0) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (oldiatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ original file
+ */
+ if (newloc.inode) {
+ inode_unref (newloc.inode);
+ newloc.inode = NULL;
+ }
+ newloc.inode = inode_ref (oldloc.inode);
+
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == -1 && errno == ESTALE) {
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+ if (reval--)
+ goto retry;
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_link (&newloc, &oldiatt);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_opendir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref (glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+
+int
+glfs_closedir (struct glfs_fd *glfd)
+{
+ __glfs_entry_fd (glfd);
+
+ gf_dirent_free (list_entry (&glfd->entries, gf_dirent_t, list));
+
+ glfs_fd_destroy (glfd);
+
+ return 0;
+}
+
+
+long
+glfs_telldir (struct glfs_fd *fd)
+{
+ return fd->offset;
+}
+
+
+void
+glfs_seekdir (struct glfs_fd *fd, long offset)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (fd->offset == offset)
+ return;
+
+ fd->offset = offset;
+ fd->next = NULL;
+
+ list_for_each_entry_safe (entry, tmp, &fd->entries, list) {
+ if (entry->d_off != offset)
+ continue;
+
+ if (&tmp->list != &fd->entries) {
+ /* found! */
+ fd->next = tmp;
+ return;
+ }
+ }
+ /* could not find entry at requested offset in the cache.
+ next readdir_r() will result in glfd_entry_refresh()
+ */
+}
+
+int
+glfs_discard_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_DISCARD;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+int
+glfs_zerofill_async (struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+
+ gio = GF_CALLOC (1, sizeof (*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ gio->op = GF_FOP_ZEROFILL;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->fn = fn;
+ gio->data = data;
+
+ ret = synctask_new (glfs_from_glfd (glfd)->ctx->env,
+ glfs_io_async_task, glfs_io_async_cbk,
+ NULL, gio);
+
+ if (ret) {
+ GF_FREE (gio->iov);
+ GF_FREE (gio);
+ }
+
+ return ret;
+}
+
+
+void
+gf_dirent_to_dirent (gf_dirent_t *gf_dirent, struct dirent *dirent)
+{
+ dirent->d_ino = gf_dirent->d_ino;
+
+#ifdef _DIRENT_HAVE_D_OFF
+ dirent->d_off = gf_dirent->d_off;
+#endif
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ dirent->d_type = gf_dirent->d_type;
+#endif
+
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ dirent->d_namlen = strlen (gf_dirent->d_name);
+#endif
+
+ strncpy (dirent->d_name, gf_dirent->d_name, GF_NAME_MAX + 1);
+}
+
+
+int
+glfd_entry_refresh (struct glfs_fd *glfd, int plus)
+{
+ xlator_t *subvol = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t old;
+ int ret = -1;
+ fd_t *fd = NULL;
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (fd->inode->ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = EBADF;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&entries.list);
+ INIT_LIST_HEAD (&old.list);
+
+ if (plus)
+ ret = syncop_readdirp (subvol, fd, 131072, glfd->offset,
+ NULL, &entries);
+ else
+ ret = syncop_readdir (subvol, fd, 131072, glfd->offset,
+ &entries);
+ if (ret >= 0) {
+ if (plus)
+ gf_link_inodes_from_dirent (THIS, fd->inode, &entries);
+
+ list_splice_init (&glfd->entries, &old.list);
+ list_splice_init (&entries.list, &glfd->entries);
+
+ /* spurious errno is dangerous for glfd_entry_next() */
+ errno = 0;
+ }
+
+ if (ret > 0)
+ glfd->next = list_entry (glfd->entries.next, gf_dirent_t, list);
+
+ gf_dirent_free (&old);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+gf_dirent_t *
+glfd_entry_next (struct glfs_fd *glfd, int plus)
+{
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+
+ if (!glfd->offset || !glfd->next) {
+ ret = glfd_entry_refresh (glfd, plus);
+ if (ret < 0)
+ return NULL;
+ }
+
+ entry = glfd->next;
+ if (!entry)
+ return NULL;
+
+ if (&entry->next->list == &glfd->entries)
+ glfd->next = NULL;
+ else
+ glfd->next = entry->next;
+
+ glfd->offset = entry->d_off;
+
+ return entry;
+}
+
+
+static struct dirent *
+glfs_readdirbuf_get (struct glfs_fd *glfd)
+{
+ struct dirent *buf = NULL;
+
+ LOCK (&glfd->fd->lock);
+ {
+ buf = glfd->readdirbuf;
+ if (buf) {
+ memset (buf, 0, READDIRBUF_SIZE);
+ goto unlock;
+ }
+
+ buf = GF_CALLOC (1, READDIRBUF_SIZE, glfs_mt_readdirbuf_t);
+ if (!buf) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ glfd->readdirbuf = buf;
+ }
+unlock:
+ UNLOCK (&glfd->fd->lock);
+
+ return buf;
+}
+
+
+int
+glfs_readdirplus_r (struct glfs_fd *glfd, struct stat *stat, struct dirent *ext,
+ struct dirent **res)
+{
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get (glfd);
+
+ if (!buf) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ entry = glfd_entry_next (glfd, !!stat);
+ if (errno)
+ ret = -1;
+
+ if (res) {
+ if (entry)
+ *res = buf;
+ else
+ *res = NULL;
+ }
+
+ if (entry) {
+ gf_dirent_to_dirent (entry, buf);
+ if (stat)
+ glfs_iatt_to_stat (glfd->fs, &entry->d_stat, stat);
+ }
+
+ return ret;
+}
+
+
+int
+glfs_readdir_r (struct glfs_fd *glfd, struct dirent *buf, struct dirent **res)
+{
+ return glfs_readdirplus_r (glfd, 0, buf, res);
+}
+
+
+struct dirent *
+glfs_readdirplus (struct glfs_fd *glfd, struct stat *stat)
+{
+ struct dirent *res = NULL;
+ int ret = -1;
+
+ ret = glfs_readdirplus_r (glfd, stat, NULL, &res);
+ if (ret)
+ return NULL;
+
+ return res;
+}
+
+
+
+struct dirent *
+glfs_readdir (struct glfs_fd *glfd)
+{
+ return glfs_readdirplus (glfd, NULL);
+}
+
+
+int
+glfs_statvfs (struct glfs *fs, const char *path, struct statvfs *buf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_statfs (subvol, &loc, buf);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setattr (struct glfs *fs, const char *path, struct iatt *iatt,
+ int valid, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt riatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &riatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &riatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_setattr (subvol, &loc, iatt, valid, 0, 0);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fsetattr (struct glfs_fd *glfd, struct iatt *iatt, int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fsetattr (subvol, fd, iatt, valid, 0, 0);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_chmod (struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_fchmod (struct glfs_fd *glfd, mode_t mode)
+{
+ int ret = -1;
+ struct iatt iatt = {0, };
+ int valid = 0;
+
+ iatt.ia_prot = ia_prot_from_st_mode (mode);
+ valid = GF_SET_ATTR_MODE;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_chown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lchown (struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_fchown (struct glfs_fd *glfd, uid_t uid, gid_t gid)
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_uid = uid;
+ iatt.ia_gid = gid;
+ valid = GF_SET_ATTR_UID|GF_SET_ATTR_GID;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_utimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 1);
+
+ return ret;
+}
+
+
+int
+glfs_lutimens (struct glfs *fs, const char *path, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_setattr (fs, path, &iatt, valid, 0);
+
+ return ret;
+}
+
+
+int
+glfs_futimens (struct glfs_fd *glfd, struct timespec times[2])
+{
+ int ret = -1;
+ int valid = 0;
+ struct iatt iatt = {0, };
+
+ iatt.ia_atime = times[0].tv_sec;
+ iatt.ia_atime_nsec = times[0].tv_nsec;
+ iatt.ia_mtime = times[1].tv_sec;
+ iatt.ia_mtime_nsec = times[1].tv_nsec;
+
+ valid = GF_SET_ATTR_ATIME|GF_SET_ATTR_MTIME;
+
+ ret = glfs_fsetattr (glfd, &iatt, valid);
+
+ return ret;
+}
+
+
+int
+glfs_getxattr_process (void *value, size_t size, dict_t *xattr,
+ const char *name)
+{
+ data_t *data = NULL;
+ int ret = -1;
+
+ data = dict_get (xattr, (char *)name);
+ if (!data) {
+ errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ ret = data->len;
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (value, data->data, ret);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr_common (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_getxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 1);
+}
+
+
+ssize_t
+glfs_lgetxattr (struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common (fs, path, name, value, size, 0);
+}
+
+
+ssize_t
+glfs_fgetxattr (struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, name);
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process (value, size, xattr, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_listxattr_process (void *value, size_t size, dict_t *xattr)
+{
+ int ret = -1;
+
+ ret = dict_keys_join (NULL, 0, xattr, NULL);
+
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ dict_keys_join (value, size, xattr, NULL);
+out:
+ if (xattr)
+ dict_unref (xattr);
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr_common (struct glfs *fs, const char *path, void *value,
+ size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr (subvol, &loc, &xattr, NULL);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+ssize_t
+glfs_listxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 1);
+}
+
+
+ssize_t
+glfs_llistxattr (struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common (fs, path, value, size, 0);
+}
+
+
+ssize_t
+glfs_flistxattr (struct glfs_fd *glfd, void *value, size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr (subvol, fd, &xattr, NULL);
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process (value, size, xattr);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+dict_t *
+dict_for_key_value (const char *name, const char *value, size_t size)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
+
+ ret = dict_set_static_bin (xattr, (char *)name, (void *)value, size);
+ if (ret) {
+ dict_destroy (xattr);
+ xattr = NULL;
+ }
+
+ return xattr;
+}
+
+
+int
+glfs_setxattr_common (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_setxattr (subvol, &loc, xattr, flags);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+ if (xattr)
+ dict_unref (xattr);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_setxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 1);
+}
+
+
+int
+glfs_lsetxattr (struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common (fs, path, name, value, size, flags, 0);
+}
+
+
+int
+glfs_fsetxattr (struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ xattr = dict_for_key_value (name, value, size);
+ if (!xattr) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (subvol, fd, xattr, flags);
+out:
+ if (xattr)
+ dict_unref (xattr);
+
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr_common (struct glfs *fs, const char *path, const char *name,
+ int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_removexattr (subvol, &loc, name);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_removexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 1);
+}
+
+
+int
+glfs_lremovexattr (struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common (fs, path, name, 0);
+}
+
+
+int
+glfs_fremovexattr (struct glfs_fd *glfd, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr (subvol, fd, name);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fallocate (subvol, fd, keep_size, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_discard (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_discard (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_zerofill (struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_zerofill (subvol, fd, offset, len);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_chdir (struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR (iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (fs, loc.inode);
+
+out:
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+
+int
+glfs_fchdir (struct glfs_fd *glfd)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set (glfd->fs, inode);
+ ret = 0;
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+char *
+glfs_realpath (struct glfs *fs, const char *path, char *resolved_path)
+{
+ int ret = -1;
+ char *retpath = NULL;
+ char *allocpath = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int reval = 0;
+
+ __glfs_entry_fs (fs);
+
+ if (resolved_path)
+ retpath = resolved_path;
+ else
+ retpath = allocpath = malloc (PATH_MAX + 1);
+
+ if (!retpath) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve (fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY (ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (loc.path) {
+ strncpy (retpath, loc.path, PATH_MAX);
+ retpath[PATH_MAX] = 0;
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (ret == -1) {
+ if (allocpath)
+ free (allocpath);
+ retpath = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return retpath;
+}
+
+
+char *
+glfs_getcwd (struct glfs *fs, char *buf, size_t n)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ char *path = NULL;
+
+ __glfs_entry_fs (fs);
+
+ if (!buf || n < 2) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ inode = glfs_cwd_get (fs);
+
+ if (!inode) {
+ strncpy (buf, "/", n);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inode_path (inode, 0, &path);
+ if (n <= ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ strncpy (buf, path, n);
+ ret = 0;
+out:
+ GF_FREE (path);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret < 0)
+ return NULL;
+
+ return buf;
+}
+
+
+static void
+gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ flock->l_type = gf_flock->l_type;
+ flock->l_whence = gf_flock->l_whence;
+ flock->l_start = gf_flock->l_start;
+ flock->l_len = gf_flock->l_len;
+ flock->l_pid = gf_flock->l_pid;
+}
+
+
+static void
+gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock)
+{
+ gf_flock->l_type = flock->l_type;
+ gf_flock->l_whence = flock->l_whence;
+ gf_flock->l_start = flock->l_start;
+ gf_flock->l_len = flock->l_len;
+ gf_flock->l_pid = flock->l_pid;
+}
+
+
+int
+glfs_posix_lock (struct glfs_fd *glfd, int cmd, struct flock *flock)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct gf_flock gf_flock = {0, };
+ struct gf_flock saved_flock = {0, };
+ fd_t *fd = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ subvol = glfs_active_subvol (glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ gf_flock_from_flock (&gf_flock, flock);
+ gf_flock_from_flock (&saved_flock, flock);
+ ret = syncop_lk (subvol, fd, cmd, &gf_flock);
+ gf_flock_to_flock (&gf_flock, flock);
+
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW))
+ fd_lk_insert_and_merge (fd, cmd, &saved_flock);
+out:
+ if (fd)
+ fd_unref (fd);
+
+ glfs_subvol_done (glfd->fs, subvol);
+
+ return ret;
+}
+
+
+struct glfs_fd *
+glfs_dup (struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ glfs_fd_t *dupfd = NULL;
+ struct glfs *fs = NULL;
+
+ __glfs_entry_fd (glfd);
+
+ fs = glfd->fs;
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd (fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ dupfd = glfs_fd_new (fs);
+ if (!dupfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ dupfd->fd = fd_ref (fd);
+out:
+ if (fd)
+ fd_unref (fd);
+ if (dupfd)
+ glfs_fd_bind (dupfd);
+
+ glfs_subvol_done (fs, subvol);
+
+ return dupfd;
+}
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
new file mode 100644
index 000000000..9c707a619
--- /dev/null
+++ b/api/src/glfs-handleops.c
@@ -0,0 +1,1278 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "syncop.h"
+#include "glfs.h"
+#include "glfs-handles.h"
+
+static void
+glfs_iatt_from_stat (struct stat *stat, int valid, struct iatt *iatt,
+ int *glvalid)
+{
+ /* validate in args */
+ if ((stat == NULL) || (iatt == NULL) || (glvalid == NULL)) {
+ errno = EINVAL;
+ return;
+ }
+
+ *glvalid = 0;
+
+ if (valid & GFAPI_SET_ATTR_MODE) {
+ iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (valid & GFAPI_SET_ATTR_UID) {
+ iatt->ia_uid = stat->st_uid;
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_GID) {
+ iatt->ia_gid = stat->st_gid;
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_ATIME) {
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (valid & GFAPI_SET_ATTR_MTIME) {
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+
+ return;
+}
+
+struct glfs_object *
+glfs_h_lookupat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+ struct glfs_object *object = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ if (parent) {
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, &iatt,
+ 0 /*TODO: links? */, 0);
+
+ /* populate out args */
+ if (!ret) {
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_stat (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_stat (subvol, &loc, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_getattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_base (fs, subvol, inode, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat (fs, &iatt, stat);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_setattrs (struct glfs *fs, struct glfs_object *object, struct stat *stat,
+ int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ int glvalid = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (stat == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* map valid masks from in args */
+ glfs_iatt_from_stat (stat, valid, &iatt, &glvalid);
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setattr (subvol, &loc, &iatt, glvalid, 0, 0);
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_open (struct glfs *fs, struct glfs_object *object, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* check types to open */
+ if (IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG (inode->ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_open (subvol, &loc, flags, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ glfd->fd->flags = flags;
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+struct glfs_object *
+glfs_h_creat (struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ glfd->fd = fd_create (loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = syncop_create (subvol, &loc, flags, mode, glfd->fd,
+ xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ exists), then the glfs_loc_link will not update the
+ loc.inode, as a result we will have a 0000 GFID that we
+ would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ if (glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mkdir (subvol, &loc, mode, xattr_req, &iatt);
+
+ /* populate out args */
+ if ( ret == 0 ) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+struct glfs_object *
+glfs_h_mknod (struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mknod (subvol, &loc, mode, dev, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_unlink (struct glfs *fs, struct glfs_object *parent, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, inode, path, &loc, NULL, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (!IA_ISDIR(loc.inode->ia_type)) {
+ ret = syncop_unlink (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ } else {
+ ret = syncop_rmdir (subvol, &loc);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_fd *
+glfs_h_opendir (struct glfs *fs, struct glfs_object *object)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (!IA_ISDIR (inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfd = glfs_fd_new (fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD (&glfd->entries);
+
+ glfd->fd = fd_create (inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_opendir (subvol, &loc, glfd->fd);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (ret && glfd) {
+ glfs_fd_destroy (glfd);
+ glfd = NULL;
+ } else {
+ fd_bind (glfd->fd);
+ glfs_fd_bind (glfd);
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ return glfd;
+}
+
+ssize_t
+glfs_h_extract_handle (struct glfs_object *object, unsigned char *handle,
+ int len)
+{
+ ssize_t ret = -1;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (!handle || !len) {
+ ret = GFAPI_HANDLE_LENGTH;
+ goto out;
+ }
+
+ if (len < GFAPI_HANDLE_LENGTH)
+ {
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy (handle, object->gfid, GFAPI_HANDLE_LENGTH);
+
+ ret = GFAPI_HANDLE_LENGTH;
+
+out:
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_create_from_handle (struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy (loc.gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ newinode = inode_find (subvol->itable, loc.gfid);
+ if (newinode)
+ loc.inode = newinode;
+ else {
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ goto out;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+ else {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode linking of %s failed: %s",
+ uuid_utoa (loc.gfid), strerror (errno));
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* populate stat */
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* populate the return object */
+ object->inode = newinode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+out:
+ /* TODO: Check where the inode ref is being held? */
+ loc_wipe (&loc);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_close (struct glfs_object *object)
+{
+ /* Release the held reference */
+ inode_unref (object->inode);
+ GF_FREE (object);
+
+ return 0;
+}
+
+int
+glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_truncate (subvol, &loc, (off_t)offset);
+
+ /* populate out args */
+ if (ret == 0)
+ ret = glfs_loc_unlink (&loc);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+struct glfs_object *
+glfs_h_symlink (struct glfs *fs, struct glfs_object *parent, const char *name,
+ const char *data, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {0, };
+ struct iatt iatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (name == NULL) ||
+ (data == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE (inode, loc, ret, errno, out, name);
+
+ /* fop/op */
+ ret = syncop_symlink (subvol, &loc, data, xattr_req, &iatt);
+
+ /* populate out args */
+ if (ret == 0) {
+ /* TODO: If the inode existed in the cache (say file already
+ * exists), then the glfs_loc_link will not update the
+ * loc.inode, as a result we will have a 0000 GFID that we
+ * would copy out to the object, this needs to be fixed.
+ */
+ ret = glfs_loc_link (&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat (fs, &iatt, stat);
+
+ ret = glfs_create_object (&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close (object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ glfs_subvol_done (fs, subvol);
+
+ return object;
+}
+
+int
+glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ char *linkval = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (buf == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_readlink (subvol, &loc, &linkval, bufsiz);
+
+ /* populate out args */
+ if (ret > 0)
+ memcpy (buf, linkval, ret);
+
+out:
+ loc_wipe (&loc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (linkval)
+ GF_FREE (linkval);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_link (struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ inode_t *pinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (linksrc == NULL) || (parent == NULL) ||
+ (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode (fs, subvol, linksrc);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE (inode, oldloc, out);
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ pinode = glfs_resolve_inode (fs, subvol, parent);
+ if (!pinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* setup newloc based on parent */
+ newloc.parent = inode_ref (pinode);
+ newloc.name = name;
+ ret = glfs_loc_touchup (&newloc);
+ if (ret != 0) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ * original file
+ */
+ newloc.inode = inode_ref (inode);
+
+ /* fop/op */
+ ret = syncop_link (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ /* TODO: No iatt to pass as there has been no lookup */
+ ret = glfs_loc_link (&newloc, NULL);
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (inode)
+ inode_unref (inode);
+
+ if (pinode)
+ inode_unref (pinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
+
+int
+glfs_h_rename (struct glfs *fs, struct glfs_object *olddir, const char *oldname,
+ struct glfs_object *newdir, const char *newname)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *oldpinode = NULL;
+ inode_t *newpinode = NULL;
+ loc_t oldloc = {0, };
+ loc_t newloc = {0, };
+ struct iatt oldiatt = {0, };
+ struct iatt newiatt = {0, };
+
+ /* validate in args */
+ if ((fs == NULL) || (olddir == NULL) || (oldname == NULL) ||
+ (newdir == NULL) || (newname == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __glfs_entry_fs (fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol (fs);
+ if ( !subvol ) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ oldpinode = glfs_resolve_inode (fs, subvol, olddir);
+ if (!oldpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, oldpinode, oldname, &oldloc,
+ &oldiatt, 0 , 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ newpinode = glfs_resolve_inode (fs, subvol, newdir);
+ if (!newpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at (fs, subvol, newpinode, newname, &newloc,
+ &newiatt, 0, 0);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) !=
+ (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename (subvol, &oldloc, &newloc);
+
+ if (ret == 0)
+ inode_rename (oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode,
+ &oldiatt);
+
+out:
+ loc_wipe (&oldloc);
+ loc_wipe (&newloc);
+
+ if (oldpinode)
+ inode_unref (oldpinode);
+
+ if (newpinode)
+ inode_unref (newpinode);
+
+ glfs_subvol_done (fs, subvol);
+
+ return ret;
+}
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
new file mode 100644
index 000000000..437f2cbc8
--- /dev/null
+++ b/api/src/glfs-handles.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_HANDLES_H
+#define _GLFS_HANDLES_H
+
+#include "glfs.h"
+
+/* GLFS OBJECT BASED OPERATIONS
+ *
+ * The following APIs are introduced to provide an API framework that can work
+ * with gluster objects (files and directories), instead of absolute paths.
+ *
+ * The following API set can be related to the POSIX *at interfaces (like
+ * openat (2)). The intention of these APIs is to be able to operate based
+ * on parent object and looking up or creating child objects within, OR to be
+ * used on the actual object thus looked up or created, and retrieve information
+ * regarding the same.
+ *
+ * The APIs also provide for generating an opaque invariant handle to the
+ * object, that can later be used to lookup the object, instead of the regular
+ * glfs_h_* variants. The APIs that provide this behaviour are,
+ * glfs_h_extract_handle and glfs_h_create_from_handle.
+ *
+ * The object handles can be transitioned to fd based operations as supported
+ * by glfs.h calls, using the glfs_h_open call. This provides a way to move
+ * from objects to fd's akin to moving from path to fd for required operations.
+ *
+ * NOTE: The opaque invariant handle is the GFID of the object in reality, but
+ * maintained as an opaque data value, for potential internal changes to the
+ * same without impacting the caller.
+ *
+ * NOTE: Currently looking up an object can create multiple object handles to
+ * the same, i.e distinct glfs_object *. Hence each such looked up or received
+ * handle from other calls, would need to be closed. In the future, for a given
+ * object these pointers would be the same, and an ease of use API to forget all
+ * instances of this bject would be provided (instead of a per lookup close).
+ * This should not change the APIs in their current form.
+ *
+ */
+
+/* Values for valid falgs to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
+/* Handle length for object handles returned from glfs_h_extract_handle or
+ * glfs_h_create_from_handle */
+#define GFAPI_HANDLE_LENGTH 16
+
+__BEGIN_DECLS
+
+/*
+ * Notes:
+ *
+ * The file object handle. One per looked up, created file/directory
+ *
+ * This had been introduced to facilitate gfid/inode based gfapi
+ * - a requirement introduced by nfs-ganesha
+ */
+struct glfs_object;
+typedef struct glfs_object glfs_object_t;
+
+/* Handle based operations */
+/* Operations that generate handles */
+struct glfs_object *glfs_h_lookupat (struct glfs *fs,
+ struct glfs_object *parent,
+ const char *path, struct stat *stat);
+
+struct glfs_object *glfs_h_creat (struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mkdir (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t flags,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_mknod (struct glfs *fs, struct glfs_object *parent,
+ const char *path, mode_t mode, dev_t dev,
+ struct stat *sb);
+
+struct glfs_object *glfs_h_symlink (struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data,
+ struct stat *stat);
+
+/* Operations on the actual objects */
+int glfs_h_unlink (struct glfs *fs, struct glfs_object *parent,
+ const char *path);
+
+int glfs_h_close (struct glfs_object *object);
+
+int glfs_caller_specific_init (void *uid_caller_key, void *gid_caller_key,
+ void *future);
+
+int glfs_h_truncate (struct glfs *fs, struct glfs_object *object, off_t offset);
+
+int glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat);
+
+int glfs_h_getattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *stat);
+
+int glfs_h_setattrs (struct glfs *fs, struct glfs_object *object,
+ struct stat *sb, int valid);
+
+int glfs_h_readlink (struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz);
+
+int glfs_h_link (struct glfs *fs, struct glfs_object *linktgt,
+ struct glfs_object *parent, const char *name);
+
+int glfs_h_rename (struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+ const char *newname);
+
+/* Operations enabling opaque invariant handle to object transitions */
+ssize_t glfs_h_extract_handle (struct glfs_object *object,
+ unsigned char *handle, int len);
+
+struct glfs_object *glfs_h_create_from_handle (struct glfs *fs,
+ unsigned char *handle, int len,
+ struct stat *stat);
+
+/* Operations enabling object handles to fd transitions */
+struct glfs_fd *glfs_h_opendir (struct glfs *fs, struct glfs_object *object);
+
+struct glfs_fd *glfs_h_open (struct glfs *fs, struct glfs_object *object,
+ int flags);
+
+__END_DECLS
+
+#endif /* !_GLFS_HANDLES_H */ \ No newline at end of file
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
new file mode 100644
index 000000000..ec1d5579d
--- /dev/null
+++ b/api/src/glfs-internal.h
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_INTERNAL_H
+#define _GLFS_INTERNAL_H
+
+#include "xlator.h"
+
+#define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+#define DEFAULT_REVAL_COUNT 1
+
+#define ESTALE_RETRY(ret,errno,reval,loc,label) do { \
+ if (ret == -1 && errno == ESTALE) { \
+ if (reval < DEFAULT_REVAL_COUNT) { \
+ reval++; \
+ loc_wipe (loc); \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_INODE(oinode, loc, label) do { \
+ loc.inode = inode_ref (oinode); \
+ uuid_copy (loc.gfid, oinode->gfid); \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_PINODE(pinode, loc, ret, errno, label, path) do { \
+ loc.inode = inode_new (pinode->table); \
+ if (!loc.inode) { \
+ ret = -1; \
+ errno = ENOMEM; \
+ goto label; \
+ } \
+ loc.parent = inode_ref (pinode); \
+ loc.name = path; \
+ ret = glfs_loc_touchup (&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+struct glfs;
+
+typedef int (*glfs_init_cbk) (struct glfs *fs, int ret);
+
+struct glfs {
+ char *volname;
+
+ glusterfs_ctx_t *ctx;
+
+ pthread_t poller;
+
+ glfs_init_cbk init_cbk;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int init;
+ int ret;
+ int err;
+
+ xlator_t *active_subvol;
+ xlator_t *next_subvol;
+ xlator_t *old_subvol;
+
+ char *oldvolfile;
+ ssize_t oldvollen;
+
+ inode_t *cwd;
+
+ uint32_t dev_id; /* Used to fill st_dev in struct stat */
+
+ struct list_head openfds;
+
+ gf_boolean_t migration_in_progress;
+};
+
+struct glfs_fd {
+ struct list_head openfds;
+ struct glfs *fs;
+ off_t offset;
+ fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */
+ struct list_head entries;
+ gf_dirent_t *next;
+ struct dirent *readdirbuf;
+};
+
+/* glfs object handle introduced for the alternate gfapi implementation based
+ on glfs handles/gfid/inode
+*/
+struct glfs_object {
+ inode_t *inode;
+ uuid_t gfid;
+};
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+int glfs_mgmt_init (struct glfs *fs);
+void glfs_init_done (struct glfs *fs, int ret);
+int glfs_process_volfp (struct glfs *fs, FILE *fp);
+int glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+int glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+fd_t *glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+fd_t *__glfs_migrate_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+int glfs_first_lookup (xlator_t *subvol);
+
+static inline void
+__glfs_entry_fs (struct glfs *fs)
+{
+ THIS = fs->ctx->master;
+}
+
+
+static inline void
+__glfs_entry_fd (struct glfs_fd *fd)
+{
+ THIS = fd->fd->inode->table->xl->ctx->master;
+}
+
+
+/*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+ for a safe implementation of graph migration where
+ we can give up the mutex during syncop calls so
+ that bottom up calls (particularly CHILD_UP notify)
+ can do a mutex_lock() on @glfs without deadlocking
+ the filesystem
+*/
+static inline int
+glfs_lock (struct glfs *fs)
+{
+ pthread_mutex_lock (&fs->mutex);
+
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ while (fs->migration_in_progress)
+ pthread_cond_wait (&fs->cond, &fs->mutex);
+
+ return 0;
+}
+
+
+static inline void
+glfs_unlock (struct glfs *fs)
+{
+ pthread_mutex_unlock (&fs->mutex);
+}
+
+
+void glfs_fd_destroy (struct glfs_fd *glfd);
+
+struct glfs_fd *glfs_fd_new (struct glfs *fs);
+void glfs_fd_bind (struct glfs_fd *glfd);
+
+xlator_t * glfs_active_subvol (struct glfs *fs);
+xlator_t * __glfs_active_subvol (struct glfs *fs);
+void glfs_subvol_done (struct glfs *fs, xlator_t *subvol);
+
+inode_t * glfs_refresh_inode (xlator_t *subvol, inode_t *inode);
+
+inode_t *glfs_cwd_get (struct glfs *fs);
+int glfs_cwd_set (struct glfs *fs, inode_t *inode);
+inode_t *glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object);
+int glfs_create_object (loc_t *loc, struct glfs_object **retobject);
+int __glfs_cwd_set (struct glfs *fs, inode_t *inode);
+
+int glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt);
+int glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval);
+int glfs_loc_touchup (loc_t *loc);
+void glfs_iatt_to_stat (struct glfs *fs, struct iatt *iatt, struct stat *stat);
+int glfs_loc_link (loc_t *loc, struct iatt *iatt);
+int glfs_loc_unlink (loc_t *loc);
+
+#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
new file mode 100644
index 000000000..c02534c18
--- /dev/null
+++ b/api/src/glfs-master.c
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int
+glfs_graph_setup (struct glfs *fs, glusterfs_graph_t *graph)
+{
+ xlator_t *new_subvol = NULL;
+ xlator_t *old_subvol = NULL;
+ inode_table_t *itable = NULL;
+ int ret = -1;
+
+ new_subvol = graph->top;
+
+ /* This is called in a bottom-up context, it should specifically
+ NOT be glfs_lock()
+ */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ if (new_subvol->switched ||
+ new_subvol == fs->active_subvol ||
+ new_subvol == fs->next_subvol) {
+ /* Spurious CHILD_UP event on old graph */
+ ret = 0;
+ goto unlock;
+ }
+
+ if (!new_subvol->itable) {
+ itable = inode_table_new (131072, new_subvol);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ new_subvol->itable = itable;
+ }
+
+ old_subvol = fs->next_subvol;
+ fs->next_subvol = new_subvol;
+ fs->next_subvol->winds++; /* first ref */
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (old_subvol)
+ /* wasn't picked up so far, skip */
+ glfs_subvol_done (fs, old_subvol);
+
+ return ret;
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ glusterfs_graph_t *graph = NULL;
+ struct glfs *fs = NULL;
+
+ graph = data;
+ fs = this->private;
+
+ switch (event) {
+ case GF_EVENT_GRAPH_NEW:
+ gf_log (this->name, GF_LOG_INFO, "New graph %s (%d) coming up",
+ uuid_utoa ((unsigned char *)graph->graph_uuid),
+ graph->id);
+ break;
+ case GF_EVENT_CHILD_UP:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 0);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ glfs_graph_setup (fs, graph);
+ glfs_init_done (fs, 1);
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ break;
+ default:
+ gf_log (this->name, GF_LOG_DEBUG,
+ "got notify event %d", event);
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, glfs_mt_end + 1);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to initialise "
+ "memory accounting");
+ return ret;
+ }
+
+ return 0;
+}
+
+
+int
+init (xlator_t *this)
+{
+ return 0;
+}
+
+
+void
+fini (xlator_t *this)
+{
+
+}
+
+
+struct xlator_dumpops dumpops;
+
+
+struct xlator_fops fops;
+
+
+struct xlator_cbks cbks;
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
new file mode 100644
index 000000000..3301b3da5
--- /dev/null
+++ b/api/src/glfs-mem-types.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MEM_TYPES_H
+#define _GLFS_MEM_TYPES_H
+
+#include "mem-types.h"
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum glfs_mem_types_ {
+ glfs_mt_glfs_t = GF_MEM_TYPE_START,
+ glfs_mt_call_pool_t,
+ glfs_mt_xlator_t,
+ glfs_mt_glfs_fd_t,
+ glfs_mt_glfs_io_t,
+ glfs_mt_volfile_t,
+ glfs_mt_xlator_cmdline_option_t,
+ glfs_mt_glfs_object_t,
+ glfs_mt_readdirbuf_t,
+ glfs_mt_end
+
+};
+#endif
+
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
new file mode 100644
index 000000000..6843e9cb3
--- /dev/null
+++ b/api/src/glfs-mgmt.c
@@ -0,0 +1,543 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* _CONFIG_H */
+
+#include "glusterfs.h"
+#include "stack.h"
+#include "dict.h"
+#include "event.h"
+#include "defaults.h"
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "glusterfs3.h"
+#include "portmap-xdr.h"
+#include "xdr-generic.h"
+
+#include "syncop.h"
+#include "xlator.h"
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+
+
+int glfs_volfile_fetch (struct glfs *fs);
+
+int
+glfs_process_volfp (struct glfs *fs, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ graph = glusterfs_graph_construct (fp);
+ if (!graph) {
+ gf_log ("glfs", GF_LOG_ERROR, "failed to construct the graph");
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp (trav->type, "mount/fuse") == 0) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fuse xlator cannot be specified "
+ "in volume file");
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare (graph, ctx);
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate (graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy (graph);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ if (!ctx->active) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_cbk_spec (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ fs = this->private;
+
+ glfs_volfile_fetch (fs);
+
+ return 0;
+}
+
+
+int
+mgmt_cbk_event (struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+
+rpcclnt_cb_actor_t mgmt_cbk_actors[] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
+ mgmt_cbk_event},
+};
+
+
+struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+
+int
+mgmt_submit_request (void *req, call_frame_t *frame,
+ glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {0, };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof (xdrproc, req);
+
+ iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize (iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic (iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
+ &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref (iobref);
+
+ if (iobuf)
+ iobuf_unref (iobuf);
+ return ret;
+}
+
+
+static int
+glusterfs_oldvolfile_update (struct glfs *fs, char *volfile, ssize_t size)
+{
+ int ret = -1;
+
+ fs->oldvollen = size;
+ if (!fs->oldvolfile) {
+ fs->oldvolfile = GF_CALLOC (1, size+1, glfs_mt_volfile_t);
+ } else {
+ fs->oldvolfile = GF_REALLOC (fs->oldvolfile, size+1);
+ }
+
+ if (!fs->oldvolfile) {
+ fs->oldvollen = 0;
+ } else {
+ memcpy (fs->oldvolfile, volfile, size);
+ fs->oldvollen = size;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+int
+mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {0,};
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ int need_retry = 0;
+ struct glfs *fs = NULL;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ need_retry = 1;
+ goto out;
+ }
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = -1;
+ errno = rsp.op_errno;
+ goto out;
+ }
+
+ ret = 0;
+ size = rsp.op_ret;
+
+ if ((size == fs->oldvollen) &&
+ (memcmp (fs->oldvolfile, rsp.spec, size) == 0)) {
+ gf_log (frame->this->name, GF_LOG_INFO,
+ "No change in volfile, continuing");
+ goto out;
+ }
+
+ tmpfp = tmpfile ();
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite (rsp.spec, size, 1, tmpfp);
+ fflush (tmpfp);
+ if (ferror (tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+
+ ret = glusterfs_volfile_reconfigure (fs->oldvollen, tmpfp, fs->ctx,
+ fs->oldvolfile);
+ if (ret == 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ if (ret)
+ goto out;
+
+ ret = glusterfs_oldvolfile_update (fs, rsp.spec, size);
+out:
+ STACK_DESTROY (frame->root);
+
+ if (rsp.spec)
+ free (rsp.spec);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log ("mgmt", GF_LOG_ERROR, "Server is operating at an "
+ "op-version which is not supported");
+ errno = ENOTSUP;
+ glfs_init_done (fs, -1);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ if (!need_retry) {
+ if (!errno)
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+ }
+
+ if (tmpfp)
+ fclose (tmpfp);
+
+ return 0;
+}
+
+
+int
+glfs_volfile_fetch (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {0, };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ frame = create_frame (THIS, ctx->pool);
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
+ ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ return ret;
+}
+
+
+static int
+mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct glfs *fs = NULL;
+ int ret = 0;
+
+ this = mydata;
+ ctx = this->ctx;
+ fs = ((xlator_t *)ctx->master)->private;
+ cmd_args = &ctx->cmd_args;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ cmd_args->max_connect_attempts--;
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to connect with remote-host: %s",
+ strerror (errno));
+ gf_log ("glfs-mgmt", GF_LOG_INFO,
+ "%d connect attempts left",
+ cmd_args->max_connect_attempts);
+ if (0 >= cmd_args->max_connect_attempts) {
+ errno = ENOTCONN;
+ glfs_init_done (fs, -1);
+ }
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
+
+ ret = glfs_volfile_fetch (fs);
+ if (ret && ctx && (ctx->active == NULL)) {
+ /* Do it only for the first time */
+ /* Exit the process.. there are some wrong options */
+ gf_log ("glfs-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ errno = EINVAL;
+ glfs_init_done (fs, -1);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+glusterfs_mgmt_notify (int32_t op, void *data, ...)
+{
+ int ret = 0;
+
+ switch (op)
+ {
+ case GF_EN_DEFRAG_STATUS:
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+
+int
+glfs_mgmt_init (struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ ret = rpc_transport_inet_options_build (&options, host, port);
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start (rpc);
+out:
+ return ret;
+}
+
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
new file mode 100644
index 000000000..4ca2eb6fc
--- /dev/null
+++ b/api/src/glfs-resolve.c
@@ -0,0 +1,969 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs-internal.h"
+
+#define graphid_str(subvol) (uuid_utoa((unsigned char *)subvol->graph->graph_uuid))
+
+
+int
+glfs_first_lookup_safe (xlator_t *subvol)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+
+ loc.inode = subvol->itable->root;
+ memset (loc.gfid, 0, 16);
+ loc.gfid[15] = 1;
+ loc.path = "/";
+ loc.name = "";
+
+ ret = syncop_lookup (subvol, &loc, 0, 0, 0, 0);
+
+ gf_log (subvol->name, GF_LOG_DEBUG, "first lookup complete %d", ret);
+
+ return ret;
+}
+
+
+int
+__glfs_first_lookup (struct glfs *fs, xlator_t *subvol)
+{
+ int ret = -1;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ ret = glfs_first_lookup_safe (subvol);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_refresh_inode_safe (xlator_t *subvol, inode_t *oldinode)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ struct iatt iatt = {0, };
+ inode_t *newinode = NULL;
+
+
+ if (!oldinode)
+ return NULL;
+
+ if (oldinode->table->xl == subvol)
+ return inode_ref (oldinode);
+
+ newinode = inode_find (subvol->itable, oldinode->gfid);
+ if (newinode)
+ return newinode;
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+ loc.inode = inode_new (subvol->itable);
+ if (!loc.inode)
+ return NULL;
+
+ ret = syncop_lookup (subvol, &loc, 0, &iatt, 0, 0);
+
+ if (ret) {
+ gf_log (subvol->name, GF_LOG_WARNING,
+ "inode refresh of %s failed: %s",
+ uuid_utoa (oldinode->gfid), strerror (errno));
+ loc_wipe (&loc);
+ return NULL;
+ }
+
+ newinode = inode_link (loc.inode, 0, 0, &iatt);
+ if (newinode)
+ inode_lookup (newinode);
+
+ loc_wipe (&loc);
+
+ return newinode;
+}
+
+
+inode_t *
+__glfs_refresh_inode (struct glfs *fs, xlator_t *subvol, inode_t *inode)
+{
+ inode_t *newinode = NULL;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newinode = glfs_refresh_inode_safe (subvol, inode);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newinode;
+}
+
+int
+glfs_loc_touchup (loc_t *loc)
+{
+ char *path = NULL;
+ int ret = -1;
+ char *bn = NULL;
+
+ if (loc->parent)
+ ret = inode_path (loc->parent, loc->name, &path);
+ else
+ ret = inode_path (loc->inode, 0, &path);
+
+ loc->path = path;
+
+ if (ret < 0 || !path) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ bn = strrchr (path, '/');
+ if (bn)
+ bn++;
+ loc->name = bn;
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+glfs_resolve_symlink (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+{
+ loc_t loc = {0, };
+ char *path = NULL;
+ char *rpath = NULL;
+ int ret = -1;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+ ret = inode_path (inode, NULL, &rpath);
+ if (ret < 0)
+ goto out;
+ loc.path = rpath;
+
+ ret = syncop_readlink (subvol, &loc, &path, 4096);
+
+ if (ret < 0)
+ goto out;
+
+ if (lpath)
+ *lpath = path;
+out:
+ loc_wipe (&loc);
+ return ret;
+}
+
+
+int
+glfs_resolve_base (struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {0, };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref (inode);
+ uuid_copy (loc.gfid, inode->gfid);
+
+ ret = inode_path (loc.inode, NULL, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup (subvol, &loc, NULL, iatt, NULL, NULL);
+out:
+ loc_wipe (&loc);
+
+ return ret;
+}
+
+
+inode_t *
+glfs_resolve_component (struct glfs *fs, xlator_t *subvol, inode_t *parent,
+ const char *component, struct iatt *iatt,
+ int force_lookup)
+{
+ loc_t loc = {0, };
+ inode_t *inode = NULL;
+ int reval = 0;
+ int ret = -1;
+ int glret = -1;
+ struct iatt ciatt = {0, };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+
+ loc.name = component;
+
+ loc.parent = inode_ref (parent);
+ uuid_copy (loc.pargfid, parent->gfid);
+
+
+ if (strcmp (component, ".") == 0)
+ loc.inode = inode_ref (parent);
+ else if (strcmp (component, "..") == 0)
+ loc.inode = inode_parent (parent, 0, 0);
+ else
+ loc.inode = inode_grep (parent->table, parent, component);
+
+ if (loc.inode) {
+ uuid_copy (loc.gfid, loc.inode->gfid);
+ reval = 1;
+
+ if (!force_lookup) {
+ inode = inode_ref (loc.inode);
+ ciatt.ia_type = inode->ia_type;
+ goto found;
+ }
+ } else {
+ uuid_generate (gfid);
+ loc.inode = inode_new (parent->table);
+ }
+
+ if (!loc.inode)
+ goto out;
+
+ glret = glfs_loc_touchup (&loc);
+ if (glret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, NULL, &ciatt, NULL, NULL);
+ if (ret && reval) {
+ inode_unref (loc.inode);
+ loc.inode = inode_new (parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new ();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ uuid_generate (gfid);
+
+ ret = dict_set_static_bin (xattr_req, "gfid-req", gfid, 16);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup (subvol, &loc, xattr_req, &ciatt,
+ NULL, NULL);
+ }
+ if (ret)
+ goto out;
+
+ inode = inode_link (loc.inode, loc.parent, component, &ciatt);
+found:
+ if (inode)
+ inode_lookup (inode);
+ if (iatt)
+ *iatt = ciatt;
+out:
+ if (xattr_req)
+ dict_unref (xattr_req);
+
+ loc_wipe (&loc);
+
+ return inode;
+}
+
+
+int
+glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ char *saveptr = NULL;
+ char *path = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ int ret = -1;
+ struct iatt ciatt = {0, };
+
+ path = gf_strdup (origpath);
+ if (!path) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ parent = NULL;
+ if (at && path[0] != '/') {
+ /* A relative resolution of a path which starts with '/'
+ is equal to an absolute path resolution.
+ */
+ inode = inode_ref (at);
+ } else {
+ inode = inode_ref (subvol->itable->root);
+
+ if (strcmp (path, "/") == 0)
+ glfs_resolve_base (fs, subvol, inode, &ciatt);
+ }
+
+ for (component = strtok_r (path, "/", &saveptr);
+ component; component = next_component) {
+
+ next_component = strtok_r (NULL, "/", &saveptr);
+
+ if (parent)
+ inode_unref (parent);
+
+ parent = inode;
+
+ inode = glfs_resolve_component (fs, subvol, parent,
+ component, &ciatt,
+ /* force hard lookup on the last
+ component, as the caller
+ wants proper iatt filled
+ */
+ (reval || (!next_component &&
+ iatt)));
+ if (!inode)
+ break;
+
+ if (IA_ISLNK (ciatt.ia_type) && (next_component || follow)) {
+ /* If the component is not the last piece,
+ then following it is necessary even if
+ not requested by the caller
+ */
+ char *lpath = NULL;
+ loc_t sym_loc = {0,};
+
+ if (follow > GLFS_SYMLINK_MAX_FOLLOW) {
+ errno = ELOOP;
+ ret = -1;
+ if (inode) {
+ inode_unref (inode);
+ inode = NULL;
+ }
+ break;
+ }
+
+ ret = glfs_resolve_symlink (fs, subvol, inode, &lpath);
+ inode_unref (inode);
+ inode = NULL;
+ if (ret < 0)
+ break;
+
+ ret = glfs_resolve_at (fs, subvol, parent, lpath,
+ &sym_loc,
+ /* followed iatt becomes the
+ component iatt
+ */
+ &ciatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ follow + 1, reval);
+ if (ret == 0)
+ inode = inode_ref (sym_loc.inode);
+ loc_wipe (&sym_loc);
+ GF_FREE (lpath);
+ }
+
+ if (!next_component)
+ break;
+
+ if (!IA_ISDIR (ciatt.ia_type)) {
+ /* next_component exists and this component is
+ not a directory
+ */
+ inode_unref (inode);
+ inode = NULL;
+ ret = -1;
+ errno = ENOTDIR;
+ break;
+ }
+ }
+
+ if (parent && next_component)
+ /* resolution failed mid-way */
+ goto out;
+
+ /* At this point, all components up to the last parent directory
+ have been resolved successfully (@parent). Resolution of basename
+ might have failed (@inode) if at all.
+ */
+
+ loc->parent = parent;
+ if (parent) {
+ uuid_copy (loc->pargfid, parent->gfid);
+ loc->name = component;
+ }
+
+ loc->inode = inode;
+ if (inode) {
+ uuid_copy (loc->gfid, inode->gfid);
+ if (iatt)
+ *iatt = ciatt;
+ ret = 0;
+ }
+
+ glfs_loc_touchup (loc);
+out:
+ GF_FREE (path);
+
+ /* do NOT loc_wipe here as only last component might be missing */
+
+ return ret;
+}
+
+
+int
+glfs_resolve_path (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+{
+ int ret = -1;
+ inode_t *cwd = NULL;
+
+ if (origpath[0] == '/')
+ return glfs_resolve_at (fs, subvol, NULL, origpath, loc, iatt,
+ follow, reval);
+
+ cwd = glfs_cwd_get (fs);
+
+ ret = glfs_resolve_at (fs, subvol, cwd, origpath, loc, iatt,
+ follow, reval);
+ if (cwd)
+ inode_unref (cwd);
+
+ return ret;
+}
+
+
+int
+glfs_resolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 1, reval);
+
+ return ret;
+}
+
+
+int
+glfs_lresolve (struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path (fs, subvol, origpath, loc, iatt, 0, reval);
+
+ return ret;
+}
+
+
+int
+glfs_migrate_fd_locks_safe (struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
+ xlator_t *newsubvol, fd_t *newfd)
+{
+ dict_t *lockinfo = NULL;
+ int ret = 0;
+ char uuid1[64];
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty (oldfd->lk_ctx))
+ return 0;
+
+ newfd->lk_ctx = fd_lk_ctx_ref (oldfd->lk_ctx);
+
+ ret = syncop_fgetxattr (oldsubvol, oldfd, &lockinfo,
+ GF_XATTR_LOCKINFO_KEY);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fgetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ if (!dict_get (lockinfo, GF_XATTR_LOCKINFO_KEY)) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "missing lokinfo key (%s) on graph %s (%d)",
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr (newsubvol, newfd, lockinfo, 0);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsetxattr (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newfd->inode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+out:
+ if (lockinfo)
+ dict_unref (lockinfo);
+ return ret;
+}
+
+
+fd_t *
+glfs_migrate_fd_safe (struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+{
+ fd_t *newfd = NULL;
+ inode_t *oldinode = NULL;
+ inode_t *newinode = NULL;
+ xlator_t *oldsubvol = NULL;
+ int ret = -1;
+ loc_t loc = {0, };
+ char uuid1[64];
+
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+
+ if (oldsubvol == newsubvol)
+ return fd_ref (oldfd);
+
+ if (!oldsubvol->switched) {
+ ret = syncop_fsync (oldsubvol, oldfd, 0);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fsync() failed (%s) on %s graph %s (%d)",
+ strerror (errno),
+ uuid_utoa_r (oldfd->inode->gfid, uuid1),
+ graphid_str (oldsubvol), oldsubvol->graph->id);
+ }
+ }
+
+ newinode = glfs_refresh_inode_safe (newsubvol, oldinode);
+ if (!newinode) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "inode (%s) refresh failed (%s) on graph %s (%d)",
+ uuid_utoa_r (oldinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd = fd_create (newinode, getpid());
+ if (!newfd) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "fd_create (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ loc.inode = inode_ref (newinode);
+
+ ret = inode_path (oldfd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_log (fs->volname, GF_LOG_INFO, "inode_path failed");
+ goto out;
+ }
+
+ uuid_copy (loc.gfid, oldinode->gfid);
+
+
+ if (IA_ISDIR (oldinode->ia_type))
+ ret = syncop_opendir (newsubvol, &loc, newfd);
+ else
+ ret = syncop_open (newsubvol, &loc,
+ oldfd->flags & ~(O_TRUNC|O_EXCL|O_CREAT),
+ newfd);
+ loc_wipe (&loc);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "syncop_open%s (%s) failed (%s) on graph %s (%d)",
+ IA_ISDIR (oldinode->ia_type) ? "dir" : "",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ ret = glfs_migrate_fd_locks_safe (fs, oldsubvol, oldfd, newsubvol,
+ newfd);
+
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_WARNING,
+ "lock migration (%s) failed (%s) on graph %s (%d)",
+ uuid_utoa_r (newinode->gfid, uuid1),
+ strerror (errno),
+ graphid_str (newsubvol), newsubvol->graph->id);
+ goto out;
+ }
+
+ newfd->flags = oldfd->flags;
+ fd_bind (newfd);
+out:
+ if (newinode)
+ inode_unref (newinode);
+
+ if (ret) {
+ fd_unref (newfd);
+ newfd = NULL;
+ }
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_migrate_fd (struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+{
+ fd_t *oldfd = NULL;
+ fd_t *newfd = NULL;
+
+ oldfd = glfd->fd;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock (&fs->mutex);
+ {
+ newfd = glfs_migrate_fd_safe (fs, newsubvol, oldfd);
+ }
+ pthread_mutex_lock (&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast (&fs->cond);
+
+ return newfd;
+}
+
+
+fd_t *
+__glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ if (glfd->fd->inode->table->xl == subvol)
+ return fd_ref (glfd->fd);
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (!fd)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd_ref (fd);
+ }
+
+ return fd;
+}
+
+
+fd_t *
+glfs_resolve_fd (struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ glfs_lock (fs);
+ {
+ fd = __glfs_resolve_fd (fs, subvol, glfd);
+ }
+ glfs_unlock (fs);
+
+ return fd;
+}
+
+
+void
+__glfs_migrate_openfds (struct glfs *fs, xlator_t *subvol)
+{
+ struct glfs_fd *glfd = NULL;
+ fd_t *fd = NULL;
+
+ list_for_each_entry (glfd, &fs->openfds, openfds) {
+ if (uuid_is_null (glfd->fd->inode->gfid)) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "skipping openfd %p/%p in graph %s (%d)",
+ glfd, glfd->fd, graphid_str(subvol),
+ subvol->graph->id);
+ /* create in progress, defer */
+ continue;
+ }
+
+ fd = __glfs_migrate_fd (fs, subvol, glfd);
+ if (fd) {
+ fd_unref (glfd->fd);
+ glfd->fd = fd;
+ }
+ }
+}
+
+
+xlator_t *
+__glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *new_subvol = NULL;
+ int ret = -1;
+ inode_t *new_cwd = NULL;
+
+ if (!fs->next_subvol)
+ return fs->active_subvol;
+
+ new_subvol = fs->next_subvol;
+
+ ret = __glfs_first_lookup (fs, new_subvol);
+ if (ret) {
+ gf_log (fs->volname, GF_LOG_INFO,
+ "first lookup on graph %s (%d) failed (%s)",
+ graphid_str (new_subvol), new_subvol->graph->id,
+ strerror (errno));
+ return NULL;
+ }
+
+ if (fs->cwd) {
+ new_cwd = __glfs_refresh_inode (fs, new_subvol, fs->cwd);
+
+ if (!new_cwd) {
+ char buf1[64];
+ gf_log (fs->volname, GF_LOG_INFO,
+ "cwd refresh of %s graph %s (%d) failed (%s)",
+ uuid_utoa_r (fs->cwd->gfid, buf1),
+ graphid_str (new_subvol),
+ new_subvol->graph->id, strerror (errno));
+ return NULL;
+ }
+ }
+
+ __glfs_migrate_openfds (fs, new_subvol);
+
+ /* switching @active_subvol and @cwd
+ should be atomic
+ */
+ fs->old_subvol = fs->active_subvol;
+ fs->active_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
+
+ if (new_cwd) {
+ __glfs_cwd_set (fs, new_cwd);
+ inode_unref (new_cwd);
+ }
+
+ gf_log (fs->volname, GF_LOG_INFO, "switched to graph %s (%d)",
+ graphid_str (new_subvol), new_subvol->graph->id);
+
+ return new_subvol;
+}
+
+xlator_t *
+glfs_active_subvol (struct glfs *fs)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *old_subvol = NULL;
+
+ glfs_lock (fs);
+ {
+ subvol = __glfs_active_subvol (fs);
+
+ if (subvol)
+ subvol->winds++;
+
+ if (fs->old_subvol) {
+ old_subvol = fs->old_subvol;
+ fs->old_subvol = NULL;
+ old_subvol->switched = 1;
+ }
+ }
+ glfs_unlock (fs);
+
+ if (old_subvol)
+ glfs_subvol_done (fs, old_subvol);
+
+ return subvol;
+}
+
+
+void
+glfs_subvol_done (struct glfs *fs, xlator_t *subvol)
+{
+ int ref = 0;
+ xlator_t *active_subvol = NULL;
+
+ if (!subvol)
+ return;
+
+ glfs_lock (fs);
+ {
+ ref = (--subvol->winds);
+ active_subvol = fs->active_subvol;
+ }
+ glfs_unlock (fs);
+
+ if (ref == 0) {
+ assert (subvol != active_subvol);
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, NULL);
+ }
+}
+
+
+int
+__glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ if (inode->table->xl != fs->active_subvol) {
+ inode = __glfs_refresh_inode (fs, fs->active_subvol, inode);
+ if (!inode)
+ return -1;
+ } else {
+ inode_ref (inode);
+ }
+
+ if (fs->cwd)
+ inode_unref (fs->cwd);
+
+ fs->cwd = inode;
+
+ return 0;
+}
+
+
+int
+glfs_cwd_set (struct glfs *fs, inode_t *inode)
+{
+ int ret = 0;
+
+ glfs_lock (fs);
+ {
+ ret = __glfs_cwd_set (fs, inode);
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+inode_t *
+__glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ if (!fs->cwd)
+ return NULL;
+
+ if (fs->cwd->table->xl == fs->active_subvol) {
+ cwd = inode_ref (fs->cwd);
+ return cwd;
+ }
+
+ cwd = __glfs_refresh_inode (fs, fs->active_subvol, fs->cwd);
+
+ return cwd;
+}
+
+inode_t *
+glfs_cwd_get (struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ glfs_lock (fs);
+ {
+ cwd = __glfs_cwd_get (fs);
+ }
+ glfs_unlock (fs);
+
+ return cwd;
+}
+
+inode_t *
+__glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ if (object->inode->table->xl == subvol)
+ return inode_ref (object->inode);
+
+ inode = __glfs_refresh_inode (fs, fs->active_subvol,
+ object->inode);
+ if (!inode)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ inode_unref (object->inode);
+ object->inode = inode_ref (inode);
+ }
+
+ return inode;
+}
+
+inode_t *
+glfs_resolve_inode (struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ glfs_lock (fs);
+ {
+ inode = __glfs_resolve_inode(fs, subvol, object);
+ }
+ glfs_unlock (fs);
+
+ return inode;
+}
+
+int
+glfs_create_object (loc_t *loc, struct glfs_object **retobject)
+{
+ struct glfs_object *object = NULL;
+
+ object = GF_CALLOC (1, sizeof(struct glfs_object),
+ glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ object->inode = loc->inode;
+ uuid_copy (object->gfid, object->inode->gfid);
+
+ /* we hold the reference */
+ loc->inode = NULL;
+
+ *retobject = object;
+
+ return 0;
+}
diff --git a/api/src/glfs.c b/api/src/glfs.c
new file mode 100644
index 000000000..29ed47c0c
--- /dev/null
+++ b/api/src/glfs.c
@@ -0,0 +1,673 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+/*
+ TODO:
+ - merge locks in glfs_posix_lock for lock self-healing
+ - set proper pid/lk_owner to call frames (currently buried in syncop)
+ - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and
+ reach it via THIS.
+ - update syncop functions to accept/return xdata. ???
+ - protocol/client to reconnect immediately after portmap disconnect.
+ - handle SEEK_END failure in _lseek()
+ - handle umask (per filesystem?)
+ - make itables LRU based
+ - 0-copy for readv/writev
+ - reconcile the open/creat mess
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "logging.h"
+#include "stack.h"
+#include "event.h"
+#include "glfs-mem-types.h"
+#include "common-utils.h"
+#include "syncop.h"
+#include "call-stub.h"
+
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "hashfn.h"
+#include "rpc-clnt.h"
+
+
+static gf_boolean_t
+vol_assigned (cmd_args_t *args)
+{
+ return args->volfile || args->volfile_server;
+}
+
+
+static int
+glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ xlator_mem_acct_init (THIS, glfs_mt_end + 1);
+
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
+ if (!ctx->process_uuid) {
+ goto err;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new ();
+ if (!ctx->iobuf_pool) {
+ goto err;
+ }
+
+ ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
+ if (!ctx->event_pool) {
+ goto err;
+ }
+
+ ctx->env = syncenv_new (0, 0, 0);
+ if (!ctx->env) {
+ goto err;
+ }
+
+ pool = GF_CALLOC (1, sizeof (call_pool_t),
+ glfs_mt_call_pool_t);
+ if (!pool) {
+ goto err;
+ }
+
+ /* frame_mem_pool size 112 * 4k */
+ pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
+ if (!pool->frame_mem_pool) {
+ goto err;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
+ if (!pool->stack_mem_pool) {
+ goto err;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ goto err;
+ }
+
+ ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto err;
+
+ ctx->dict_pair_pool = mem_pool_new (data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto err;
+
+ ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto err;
+
+ INIT_LIST_HEAD (&pool->all_frames);
+ INIT_LIST_HEAD (&ctx->cmd_args.xlator_options);
+ LOCK_INIT (&pool->lock);
+ ctx->pool = pool;
+
+ pthread_mutex_init (&(ctx->lock), NULL);
+
+ ret = 0;
+err:
+ if (ret && pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy (pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy (pool->stack_mem_pool);
+ GF_FREE (pool);
+ }
+
+ if (ret && ctx) {
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy (ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy (ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy (ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy (ctx->dict_pair_pool);
+ }
+
+ return ret;
+}
+
+
+static int
+create_master (struct glfs *fs)
+{
+ int ret = 0;
+ xlator_t *master = NULL;
+
+ master = GF_CALLOC (1, sizeof (*master),
+ glfs_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup ("gfapi");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type (master, "mount/api") == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "master xlator for %s initialization failed",
+ fs->volname);
+ goto err;
+ }
+
+ master->ctx = fs->ctx;
+ master->private = fs;
+ master->options = get_new_dict ();
+ if (!master->options)
+ goto err;
+
+
+ ret = xlator_init (master);
+ if (ret) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "failed to initialize gfapi translator");
+ goto err;
+ }
+
+ fs->ctx->master = master;
+ THIS = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy (master);
+ }
+
+ return -1;
+}
+
+
+static FILE *
+get_volfp (struct glfs *fs)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+ struct stat statbuf;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ ret = lstat (cmd_args->volfile, &statbuf);
+ if (ret == -1) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "%s: %s", cmd_args->volfile, strerror (errno));
+ return NULL;
+ }
+
+ if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "volume file %s: %s",
+ cmd_args->volfile,
+ strerror (errno));
+ return NULL;
+ }
+
+ gf_log ("glfs", GF_LOG_DEBUG,
+ "loading volume file %s", cmd_args->volfile);
+
+ return specfp;
+}
+
+
+int
+glfs_volumes_init (struct glfs *fs)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (!vol_assigned (cmd_args))
+ return -1;
+
+ if (cmd_args->volfile_server) {
+ ret = glfs_mgmt_init (fs);
+ goto out;
+ }
+
+ fp = get_volfp (fs);
+
+ if (!fp) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "Cannot reach volume specification file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_process_volfp (fs, fp);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+
+
+int
+glfs_set_xlator_option (struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+{
+ xlator_cmdline_option_t *option = NULL;
+
+ option = GF_CALLOC (1, sizeof (*option),
+ glfs_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto enomem;
+
+ INIT_LIST_HEAD (&option->cmd_args);
+
+ option->volume = gf_strdup (xlator);
+ if (!option->volume)
+ goto enomem;
+ option->key = gf_strdup (key);
+ if (!option->key)
+ goto enomem;
+ option->value = gf_strdup (value);
+ if (!option->value)
+ goto enomem;
+
+ list_add (&option->cmd_args, &fs->ctx->cmd_args.xlator_options);
+
+ return 0;
+enomem:
+ errno = ENOMEM;
+
+ if (!option)
+ return -1;
+
+ GF_FREE (option->volume);
+ GF_FREE (option->key);
+ GF_FREE (option->value);
+ GF_FREE (option);
+
+ return -1;
+}
+
+int glfs_setfsuid (uid_t fsuid)
+{
+ return syncopctx_setfsuid (&fsuid);
+}
+
+int glfs_setfsgid (gid_t fsgid)
+{
+ return syncopctx_setfsgid (&fsgid);
+}
+
+int glfs_setfsgroups (size_t size, const gid_t *list)
+{
+ return syncopctx_setfsgroups(size, list);
+}
+
+struct glfs *
+glfs_from_glfd (struct glfs_fd *glfd)
+{
+ return glfd->fs;
+}
+
+
+struct glfs_fd *
+glfs_fd_new (struct glfs *fs)
+{
+ struct glfs_fd *glfd = NULL;
+
+ glfd = GF_CALLOC (1, sizeof (*glfd), glfs_mt_glfs_fd_t);
+ if (!glfd)
+ return NULL;
+
+ glfd->fs = fs;
+
+ INIT_LIST_HEAD (&glfd->openfds);
+
+ return glfd;
+}
+
+
+void
+glfs_fd_bind (struct glfs_fd *glfd)
+{
+ struct glfs *fs = NULL;
+
+ fs = glfd->fs;
+
+ glfs_lock (fs);
+ {
+ list_add_tail (&glfd->openfds, &fs->openfds);
+ }
+ glfs_unlock (fs);
+}
+
+void
+glfs_fd_destroy (struct glfs_fd *glfd)
+{
+ if (!glfd)
+ return;
+
+ glfs_lock (glfd->fs);
+ {
+ list_del_init (&glfd->openfds);
+ }
+ glfs_unlock (glfd->fs);
+
+ if (glfd->fd)
+ fd_unref (glfd->fd);
+
+ GF_FREE (glfd->readdirbuf);
+
+ GF_FREE (glfd);
+}
+
+
+static void *
+glfs_poller (void *data)
+{
+ struct glfs *fs = NULL;
+
+ fs = data;
+
+ event_dispatch (fs->ctx->event_pool);
+
+ return NULL;
+}
+
+
+struct glfs *
+glfs_new (const char *volname)
+{
+ struct glfs *fs = NULL;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ return NULL;
+ }
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ /* first globals init, for gf_mem_acct_enable_set () */
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return NULL;
+
+ THIS->ctx = ctx;
+
+ /* then ctx_defaults_init, for xlator_mem_acct_init(THIS) */
+ ret = glusterfs_ctx_defaults_init (ctx);
+ if (ret)
+ return NULL;
+
+ fs = GF_CALLOC (1, sizeof (*fs), glfs_mt_glfs_t);
+ if (!fs)
+ return NULL;
+ fs->ctx = ctx;
+
+ glfs_set_logging (fs, "/dev/null", 0);
+
+ fs->ctx->cmd_args.volfile_id = gf_strdup (volname);
+
+ fs->volname = gf_strdup (volname);
+
+ pthread_mutex_init (&fs->mutex, NULL);
+ pthread_cond_init (&fs->cond, NULL);
+
+ INIT_LIST_HEAD (&fs->openfds);
+
+ return fs;
+}
+
+
+int
+glfs_set_volfile (struct glfs *fs, const char *volfile)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile = gf_strdup (volfile);
+
+ return 0;
+}
+
+
+int
+glfs_set_volfile_server (struct glfs *fs, const char *transport,
+ const char *host, int port)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned (cmd_args))
+ return -1;
+
+ cmd_args->volfile_server = gf_strdup (host);
+ cmd_args->volfile_server_transport = gf_strdup (transport);
+ cmd_args->volfile_server_port = port;
+ cmd_args->max_connect_attempts = 2;
+
+ return 0;
+}
+
+
+int
+glfs_set_logging (struct glfs *fs, const char *logfile, int loglevel)
+{
+ int ret = 0;
+ char *tmplog = NULL;
+
+ if (!logfile) {
+ ret = gf_set_log_file_path (&fs->ctx->cmd_args);
+ if (ret)
+ goto out;
+ tmplog = fs->ctx->cmd_args.log_file;
+ } else {
+ tmplog = (char *)logfile;
+ }
+
+ ret = gf_log_init (fs->ctx, tmplog, NULL);
+ if (ret)
+ goto out;
+
+ if (loglevel >= 0)
+ gf_log_set_loglevel (loglevel);
+
+out:
+ return ret;
+}
+
+
+int
+glfs_init_wait (struct glfs *fs)
+{
+ int ret = -1;
+
+ /* Always a top-down call, use glfs_lock() */
+ glfs_lock (fs);
+ {
+ while (!fs->init)
+ pthread_cond_wait (&fs->cond,
+ &fs->mutex);
+ ret = fs->ret;
+ errno = fs->err;
+ }
+ glfs_unlock (fs);
+
+ return ret;
+}
+
+
+void
+glfs_init_done (struct glfs *fs, int ret)
+{
+ glfs_init_cbk init_cbk;
+
+ if (!fs) {
+ gf_log ("glfs", GF_LOG_ERROR,
+ "fs is NULL");
+ goto out;
+ }
+
+ init_cbk = fs->init_cbk;
+
+ /* Always a bottom-up call, use mutex_lock() */
+ pthread_mutex_lock (&fs->mutex);
+ {
+ fs->init = 1;
+ fs->ret = ret;
+ fs->err = errno;
+
+ if (!init_cbk)
+ pthread_cond_broadcast (&fs->cond);
+ }
+ pthread_mutex_unlock (&fs->mutex);
+
+ if (init_cbk)
+ init_cbk (fs, ret);
+out:
+ return;
+}
+
+
+int
+glfs_init_common (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = create_master (fs);
+ if (ret)
+ return ret;
+
+ ret = gf_thread_create (&fs->poller, NULL, glfs_poller, fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_volumes_init (fs);
+ if (ret)
+ return ret;
+
+ fs->dev_id = gf_dm_hashfn (fs->volname, strlen (fs->volname));
+ return ret;
+}
+
+
+int
+glfs_init_async (struct glfs *fs, glfs_init_cbk cbk)
+{
+ int ret = -1;
+
+ fs->init_cbk = cbk;
+
+ ret = glfs_init_common (fs);
+
+ return ret;
+}
+
+
+int
+glfs_init (struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = glfs_init_common (fs);
+ if (ret)
+ return ret;
+
+ ret = glfs_init_wait (fs);
+
+ return ret;
+}
+
+
+int
+glfs_fini (struct glfs *fs)
+{
+ int ret = -1;
+ int countdown = 100;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_pool_t *call_pool = NULL;
+
+ ctx = fs->ctx;
+
+ if (ctx->mgmt) {
+ rpc_clnt_disable (ctx->mgmt);
+ ctx->mgmt = NULL;
+ }
+
+ __glfs_entry_fs (fs);
+
+ call_pool = fs->ctx->pool;
+
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ if (!call_pool->cnt)
+ break;
+ usleep (100000);
+ }
+ /* leaked frames may exist, we ignore */
+
+ /*We deem glfs_fini as successful if there are no pending frames in the call
+ *pool*/
+ ret = (call_pool->cnt == 0)? 0: -1;
+
+ subvol = glfs_active_subvol (fs);
+ if (subvol) {
+ /* PARENT_DOWN within glfs_subvol_done() is issued only
+ on graph switch (new graph should activiate and
+ decrement the extra @winds count taken in glfs_graph_setup()
+
+ Since we are explicitly destroying, PARENT_DOWN is necessary
+ */
+ xlator_notify (subvol, GF_EVENT_PARENT_DOWN, subvol, 0);
+ /* TBD: wait for CHILD_DOWN before exiting, in case of
+ asynchronous cleanup like graceful socket disconnection
+ in the future.
+ */
+ }
+
+ glfs_subvol_done (fs, subvol);
+
+ if (ctx->log.logfile)
+ fclose (ctx->log.logfile);
+
+ return ret;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
new file mode 100644
index 000000000..18fda496e
--- /dev/null
+++ b/api/src/glfs.h
@@ -0,0 +1,581 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+
+#ifndef _GLFS_H
+#define _GLFS_H
+
+/*
+ Enforce the following flags as libgfapi is built
+ with them, and we want programs linking against them to also
+ be built with these flags. This is necessary as it affects
+ some of the structures defined in libc headers (like struct stat)
+ and those definitions need to be consistently compiled in
+ both the library and the application.
+*/
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef __USE_FILE_OFFSET64
+#define __USE_FILE_OFFSET64
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/cdefs.h>
+#include <dirent.h>
+#include <sys/statvfs.h>
+
+__BEGIN_DECLS
+
+/* The filesystem object. One object per 'virtual mount' */
+struct glfs;
+typedef struct glfs glfs_t;
+
+
+/*
+ SYNOPSIS
+
+ glfs_new: Create a new 'virtual mount' object.
+
+ DESCRIPTION
+
+ This is most likely the very first function you will use. This function
+ will create a new glfs_t (virtual mount) object in memory.
+
+ On this newly created glfs_t, you need to be either set a volfile path
+ (glfs_set_volfile) or a volfile server (glfs_set_volfile_server).
+
+ The glfs_t object needs to be initialized with glfs_init() before you
+ can start issuing file operations on it.
+
+ PARAMETERS
+
+ @volname: Name of the volume. This identifies the server-side volume and
+ the fetched volfile (equivalent of --volfile-id command line
+ parameter to glusterfsd). When used with glfs_set_volfile() the
+ @volname has no effect (except for appearing in log messages).
+
+ RETURN VALUES
+
+ NULL : Out of memory condition.
+ Others : Pointer to the newly created glfs_t virtual mount object.
+
+*/
+
+glfs_t *glfs_new (const char *volname);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile: Specify the path to the volume specification file.
+
+ DESCRIPTION
+
+ If you are using a static volume specification file (without dynamic
+ volume management abilities from the CLI), then specify the path to
+ the volume specification file.
+
+ This is incompatible with glfs_set_volfile_server().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @volfile: Path to the locally available volume specification file.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile (glfs_t *fs, const char *volfile);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile_server: Specify the address of management server.
+
+ DESCRIPTION
+
+ This function specifies the address of the management server (glusterd)
+ to connect, and establish the volume configuration. The @volname
+ parameter passed to glfs_new() is the volume which will be virtually
+ mounted as the glfs_t object. All operations performed by the CLI at
+ the management server will automatically be reflected in the 'virtual
+ mount' object as it maintains a connection to glusterd and polls on
+ configuration change notifications.
+
+ This is incompatible with glfs_set_volfile().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @transport: String specifying the transport used to connect to the
+ management daemon. Specifying NULL will result in the usage
+ of the default (tcp) transport type. Permitted values
+ are those what you specify as transport-type in a volume
+ specification file (e.g "tcp", "rdma", "unix".)
+
+ @host: String specifying the address of where to find the management
+ daemon. Depending on the transport type this would either be
+ an FQDN (e.g: "storage01.company.com"), ASCII encoded IP
+ address "192.168.22.1", or a UNIX domain socket path (e.g
+ "/tmp/glusterd.socket".)
+
+ @port: The TCP port number where gluster management daemon is listening.
+ Specifying 0 uses the default port number GF_DEFAULT_BASE_PORT.
+ This parameter is unused if you are using a UNIX domain socket.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_volfile_server (glfs_t *fs, const char *transport,
+ const char *host, int port);
+
+
+/*
+ SYNOPSIS
+
+ glfs_set_logging: Specify logging parameters.
+
+ DESCRIPTION
+
+ This function specifies logging parameters for the virtual mount.
+ Default log file is /dev/null.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the logging parameters.
+
+ @logfile: The logfile to be used for logging. Will be created if it does not
+ already exist (provided system permissions allow). If NULL, a new
+ logfile will be created in default log directory associated with
+ the glusterfs installation.
+
+ @loglevel: Numerical value specifying the degree of verbosity. Higher the
+ value, more verbose the logging.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_set_logging (glfs_t *fs, const char *logfile, int loglevel);
+
+
+/*
+ SYNOPSIS
+
+ glfs_init: Initialize the 'virtual mount'
+
+ DESCRIPTION
+
+ This function initializes the glfs_t object. This consists of many steps:
+ - Spawn a poll-loop thread.
+ - Establish connection to management daemon and receive volume specification.
+ - Construct translator graph and initialize graph.
+ - Wait for initialization (connecting to all bricks) to complete.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int glfs_init (glfs_t *fs);
+
+
+/*
+ SYNOPSIS
+
+ glfs_fini: Cleanup and destroy the 'virtual mount'
+
+ DESCRIPTION
+
+ This function attempts to gracefully destroy glfs_t object. An attempt is
+ made to wait for all background processing to complete before returning.
+
+ glfs_fini() must be called after all operations on glfs_t is finished.
+
+ IMPORTANT
+
+ IT IS NECESSARY TO CALL glfs_fini() ON ALL THE INITIALIZED glfs_t
+ OBJECTS BEFORE TERMINATING THE PROGRAM. THERE MAY BE CACHED AND
+ UNWRITTEN / INCOMPLETE OPERATIONS STILL IN PROGRESS EVEN THOUGH THE
+ API CALLS HAVE RETURNED. glfs_fini() WILL WAIT FOR BACKGROUND OPERATIONS
+ TO COMPLETE BEFORE RETURNING, THEREBY MAKING IT SAFE FOR THE PROGRAM TO
+ EXIT.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be destroyed.
+
+ RETURN VALUES
+
+ 0 : Success.
+*/
+
+int glfs_fini (glfs_t *fs);
+
+/*
+ * FILE OPERATION
+ *
+ * What follows are filesystem operations performed on the
+ * 'virtual mount'. The calls here are kept as close to
+ * the POSIX system calls as possible.
+ *
+ * Notes:
+ *
+ * - All paths specified, even if absolute, are relative to the
+ * root of the virtual mount and not the system root (/).
+ *
+ */
+
+/* The file descriptor object. One per open file/directory. */
+
+struct glfs_fd;
+typedef struct glfs_fd glfs_fd_t;
+
+/*
+ * PER THREAD IDENTITY MODIFIERS
+ *
+ * The following operations enable to set a per thread identity context
+ * for the glfs APIs to perform operations as. The calls here are kept as close
+ * to POSIX equivalents as possible.
+ *
+ * NOTES:
+ *
+ * - setgroups is a per thread setting, hence this is named as fsgroups to be
+ * close in naming to the fs(u/g)id APIs
+ * - Typical mode of operation is to set the IDs as required, with the
+ * supplementary groups being optionally set, make the glfs call and post the
+ * glfs operation set them back to eu/gid or uid/gid as appropriate to the
+ * caller
+ * - The groups once set, need to be unset by setting the size to 0 (in which
+ * case the list argument is a do not care)
+ * - Once a process for a thread of operation choses to set the IDs, all glfs
+ * calls made from that thread would default to the IDs set for the thread.
+ * As a result use these APIs with care and ensure that the set IDs are
+ * reverted to global process defaults as required.
+ *
+ */
+int glfs_setfsuid (uid_t fsuid);
+int glfs_setfsgid (gid_t fsgid);
+int glfs_setfsgroups (size_t size, const gid_t *list);
+
+/*
+ SYNOPSIS
+
+ glfs_open: Open a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @flags: Open flags. See open(2). O_CREAT is not supported.
+ Use glfs_creat() for creating files.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_open (glfs_t *fs, const char *path, int flags);
+
+
+/*
+ SYNOPSIS
+
+ glfs_creat: Create a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @mode: Permission of the file to be created.
+
+ @flags: Create flags. See open(2). O_EXCL is supported.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *glfs_creat (glfs_t *fs, const char *path, int flags,
+ mode_t mode);
+
+int glfs_close (glfs_fd_t *fd);
+
+glfs_t *glfs_from_glfd (glfs_fd_t *fd);
+
+int glfs_set_xlator_option (glfs_t *fs, const char *xlator, const char *key,
+ const char *value);
+
+/*
+
+ glfs_io_cbk
+
+ The following is the function type definition of the callback
+ function pointer which has to be provided by the caller to the
+ *_async() versions of the IO calls.
+
+ The callback function is called on completion of the requested
+ IO, and the appropriate return value is returned in @ret.
+
+ In case of an error in completing the IO, @ret will be -1 and
+ @errno will be set with the appropriate error.
+
+ @ret will be same as the return value of the non _async() variant
+ of the particular call
+
+ @data is the same context pointer provided by the caller at the
+ time of issuing the async IO call. This can be used by the
+ caller to differentiate different instances of the async requests
+ in a common callback function.
+*/
+
+typedef void (*glfs_io_cbk) (glfs_fd_t *fd, ssize_t ret, void *data);
+
+// glfs_{read,write}[_async]
+
+ssize_t glfs_read (glfs_fd_t *fd, void *buf, size_t count, int flags);
+ssize_t glfs_write (glfs_fd_t *fd, const void *buf, size_t count, int flags);
+int glfs_read_async (glfs_fd_t *fd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+int glfs_write_async (glfs_fd_t *fd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data);
+
+// glfs_{read,write}v[_async]
+
+ssize_t glfs_readv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+ssize_t glfs_writev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags);
+int glfs_readv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_writev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}[_async]
+
+ssize_t glfs_pread (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags);
+ssize_t glfs_pwrite (glfs_fd_t *fd, const void *buf, size_t count,
+ off_t offset, int flags);
+int glfs_pread_async (glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+int glfs_pwrite_async (glfs_fd_t *fd, const void *buf, int count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data);
+
+// glfs_p{read,write}v[_async]
+
+ssize_t glfs_preadv (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+ssize_t glfs_pwritev (glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ off_t offset, int flags);
+int glfs_preadv_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+int glfs_pwritev_async (glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data);
+
+
+off_t glfs_lseek (glfs_fd_t *fd, off_t offset, int whence);
+
+int glfs_truncate (glfs_t *fs, const char *path, off_t length);
+
+int glfs_ftruncate (glfs_fd_t *fd, off_t length);
+int glfs_ftruncate_async (glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
+ void *data);
+
+int glfs_lstat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_stat (glfs_t *fs, const char *path, struct stat *buf);
+int glfs_fstat (glfs_fd_t *fd, struct stat *buf);
+
+int glfs_fsync (glfs_fd_t *fd);
+int glfs_fsync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_fdatasync (glfs_fd_t *fd);
+int glfs_fdatasync_async (glfs_fd_t *fd, glfs_io_cbk fn, void *data);
+
+int glfs_access (glfs_t *fs, const char *path, int mode);
+
+int glfs_symlink (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_readlink (glfs_t *fs, const char *path, char *buf, size_t bufsiz);
+
+int glfs_mknod (glfs_t *fs, const char *path, mode_t mode, dev_t dev);
+
+int glfs_mkdir (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_unlink (glfs_t *fs, const char *path);
+
+int glfs_rmdir (glfs_t *fs, const char *path);
+
+int glfs_rename (glfs_t *fs, const char *oldpath, const char *newpath);
+
+int glfs_link (glfs_t *fs, const char *oldpath, const char *newpath);
+
+glfs_fd_t *glfs_opendir (glfs_t *fs, const char *path);
+
+/*
+ * @glfs_readdir_r and @glfs_readdirplus_r ARE thread safe AND re-entrant,
+ * but the interface has ambiguity about the size of @dirent to be allocated
+ * before calling the APIs. 512 byte buffer (for @dirent) is sufficient for
+ * all known systems which are tested againt glusterfs/gfapi, but may be
+ * insufficient in the future.
+ */
+
+int glfs_readdir_r (glfs_fd_t *fd, struct dirent *dirent,
+ struct dirent **result);
+
+int glfs_readdirplus_r (glfs_fd_t *fd, struct stat *stat, struct dirent *dirent,
+ struct dirent **result);
+
+/*
+ * @glfs_readdir and @glfs_readdirplus are NEITHER thread safe NOR re-entrant
+ * when called on the same directory handle. However they ARE thread safe
+ * AND re-entrant when called on different directory handles (which may be
+ * referring to the same directory too.)
+ */
+
+struct dirent *glfs_readdir (glfs_fd_t *fd);
+
+struct dirent *glfs_readdirplus (glfs_fd_t *fd, struct stat *stat);
+
+long glfs_telldir (glfs_fd_t *fd);
+
+void glfs_seekdir (glfs_fd_t *fd, long offset);
+
+int glfs_closedir (glfs_fd_t *fd);
+
+int glfs_statvfs (glfs_t *fs, const char *path, struct statvfs *buf);
+
+int glfs_chmod (glfs_t *fs, const char *path, mode_t mode);
+
+int glfs_fchmod (glfs_fd_t *fd, mode_t mode);
+
+int glfs_chown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_lchown (glfs_t *fs, const char *path, uid_t uid, gid_t gid);
+
+int glfs_fchown (glfs_fd_t *fd, uid_t uid, gid_t gid);
+
+int glfs_utimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_lutimens (glfs_t *fs, const char *path, struct timespec times[2]);
+
+int glfs_futimens (glfs_fd_t *fd, struct timespec times[2]);
+
+ssize_t glfs_getxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_lgetxattr (glfs_t *fs, const char *path, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_fgetxattr (glfs_fd_t *fd, const char *name,
+ void *value, size_t size);
+
+ssize_t glfs_listxattr (glfs_t *fs, const char *path, void *value, size_t size);
+
+ssize_t glfs_llistxattr (glfs_t *fs, const char *path, void *value,
+ size_t size);
+
+ssize_t glfs_flistxattr (glfs_fd_t *fd, void *value, size_t size);
+
+int glfs_setxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_lsetxattr (glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_fsetxattr (glfs_fd_t *fd, const char *name,
+ const void *value, size_t size, int flags);
+
+int glfs_removexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_lremovexattr (glfs_t *fs, const char *path, const char *name);
+
+int glfs_fremovexattr (glfs_fd_t *fd, const char *name);
+
+int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len);
+
+int glfs_discard(glfs_fd_t *fd, off_t offset, size_t len);
+
+
+int glfs_discard_async (glfs_fd_t *fd, off_t length, size_t lent,
+ glfs_io_cbk fn, void *data);
+
+int glfs_zerofill(glfs_fd_t *fd, off_t offset, size_t len);
+
+int glfs_zerofill_async (glfs_fd_t *fd, off_t length, size_t len,
+ glfs_io_cbk fn, void *data);
+
+char *glfs_getcwd (glfs_t *fs, char *buf, size_t size);
+
+int glfs_chdir (glfs_t *fs, const char *path);
+
+int glfs_fchdir (glfs_fd_t *fd);
+
+char *glfs_realpath (glfs_t *fs, const char *path, char *resolved_path);
+
+/*
+ * @cmd and @flock are as specified in man fcntl(2).
+ */
+int glfs_posix_lock (glfs_fd_t *fd, int cmd, struct flock *flock);
+
+glfs_fd_t *glfs_dup (glfs_fd_t *fd);
+
+__END_DECLS
+
+#endif /* !_GLFS_H */
diff --git a/argp-standalone/configure.ac b/argp-standalone/configure.ac
index 65ebc4518..2ecd2a801 100644
--- a/argp-standalone/configure.ac
+++ b/argp-standalone/configure.ac
@@ -8,7 +8,7 @@ AC_CONFIG_SRCDIR([argp-ba.c])
AC_CONFIG_AUX_DIR([.])
AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(config.h)
+AC_CONFIG_HEADERS(config.h)
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
@@ -22,7 +22,7 @@ AC_GNU_SOURCE
AC_PROG_CC
AC_PROG_MAKE_SET
AC_PROG_RANLIB
-AM_PROG_CC_STDC
+AC_PROG_CC
if test "x$am_cv_prog_cc_stdc" = xno ; then
AC_ERROR([the C compiler doesn't handle ANSI-C])
diff --git a/autogen.sh b/autogen.sh
index e20408bf2..f937e6be0 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,8 +1,105 @@
#!/bin/sh
-aclocal
-autoheader
-(libtoolize --automake --copy --force || glibtoolize --automake --copy --force)
-autoconf
-automake --add-missing --copy --foreign
+echo
+echo ... GlusterFS autogen ...
+echo
+
+## Check all dependencies are present
+MISSING=""
+
+# Check for aclocal
+env aclocal --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ ACLOCAL=aclocal
+else
+ MISSING="$MISSING aclocal"
+fi
+
+# Check for autoconf
+env autoconf --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOCONF=autoconf
+else
+ MISSING="$MISSING autoconf"
+fi
+
+# Check for autoheader
+env autoheader --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOHEADER=autoheader
+else
+ MISSING="$MISSING autoheader"
+fi
+
+# Check for automake
+env automake --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOMAKE=automake
+else
+ MISSING="$MISSING automake"
+fi
+
+# Check for libtoolize or glibtoolize
+env libtoolize --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ # libtoolize was found, so use it
+ TOOL=libtoolize
+else
+ # libtoolize wasn't found, so check for glibtoolize
+ env glibtoolize --version > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ TOOL=glibtoolize
+ else
+ MISSING="$MISSING libtoolize/glibtoolize"
+ fi
+fi
+
+# Check for tar
+env tar --version > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ MISSING="$MISSING tar"
+fi
+
+## If dependencies are missing, warn the user and abort
+if [ "x$MISSING" != "x" ]; then
+ echo "Aborting."
+ echo
+ echo "The following build tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ echo "Please install them and try again."
+ echo
+ exit 1
+fi
+
+## generate gf-error-codes.h from error-codes.json
+echo "Generate gf-error-codes.h ..."
+if ./gen-headers.py; then
+ if ! mv -fv gf-error-codes.h libglusterfs/src/gf-error-codes.h; then
+ exit 1
+ fi
+else
+ exit 1
+fi
+
+## Do the autogeneration
+echo Running ${ACLOCAL}...
+$ACLOCAL -I ./contrib/aclocal
+echo Running ${AUTOHEADER}...
+$AUTOHEADER
+echo Running ${TOOL}...
+$TOOL --automake --copy --force
+echo Running ${AUTOCONF}...
+$AUTOCONF
+echo Running ${AUTOMAKE}...
+$AUTOMAKE --add-missing --copy --foreign
+
+# Run autogen in the argp-standalone sub-directory
cd argp-standalone;./autogen.sh
+
+# Instruct user on next steps
+echo
+echo "Please proceed with configuring, compiling, and installing."
diff --git a/booster/Makefile.am b/booster/Makefile.am
deleted file mode 100644
index e1c45f305..000000000
--- a/booster/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS=src \ No newline at end of file
diff --git a/booster/src/Makefile.am b/booster/src/Makefile.am
deleted file mode 100644
index d7d83abf5..000000000
--- a/booster/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-ldpreload_LTLIBRARIES = libglusterfs-booster.la
-ldpreloaddir = $(libdir)/glusterfs
-noinst_HEADERS = booster_fstab.h booster-fd.h
-libglusterfs_booster_la_SOURCES = booster.c booster_stat.c booster_fstab.c booster-fd.c
-libglusterfs_booster_la_CFLAGS = -I$(top_srcdir)/libglusterfsclient/src/ -D_GNU_SOURCE -D$(GF_HOST_OS) -fPIC -Wall \
- -pthread $(GF_BOOSTER_CFLAGS) -shared -nostartfiles
-libglusterfs_booster_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -I$(top_srcdir)/libglusterfsclient/src \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(ARGP_STANDALONE_CPPFLAGS)
-
-libglusterfs_booster_la_LDFLAGS = -module -avoidversion
-libglusterfs_booster_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
-
-CLEANFILES =
-
-uninstall-local:
- rm -f $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
-
-install-data-hook:
- ln -sf libglusterfs-booster.so $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
diff --git a/booster/src/booster-fd.c b/booster/src/booster-fd.c
deleted file mode 100644
index fa5b0cde2..000000000
--- a/booster/src/booster-fd.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#include "booster-fd.h"
-#include <logging.h>
-#include <mem-pool.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <common-utils.h>
-#include <string.h>
-
-#include <assert.h>
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
- */
-static inline uint
-gf_roundup_power_of_two (uint nr)
-{
- uint result = 1;
-
- if (nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
-
- return result;
-}
-
-#define BOOSTER_NFDBITS (sizeof (unsigned long))
-
-#define BOOSTER_FDMASK(d) (1UL << ((d) % BOOSTER_NFDBITS))
-#define BOOSTER_FDELT(d) (d / BOOSTER_NFDBITS)
-#define BOOSTER_FD_SET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] |= BOOSTER_FDMASK(d))
-#define BOOSTER_FD_CLR(set, d) (set->fd_bits[BOOSTER_FDELT(d)] &= ~BOOSTER_FDMASK(d))
-#define BOOSTER_FD_ISSET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] & BOOSTER_FDMASK(d))
-
-inline int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- return BOOSTER_FD_ISSET(fdtable->close_on_exec, fd);
-}
-
-inline void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- BOOSTER_FD_SET(fdtable->close_on_exec, fd);
-}
-
-int
-booster_fdtable_expand (booster_fdtable_t *fdtable, uint nr)
-{
- fd_t **oldfds = NULL, **tmp = NULL;
- uint oldmax_fds = -1;
- uint cpy = 0;
- int32_t ret = -1, bytes = 0;
- booster_fd_set_t *oldclose_on_exec = NULL;
-
- if (fdtable == NULL || nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Invalid argument");
- errno = EINVAL;
- ret = -1;
- goto out;
- }
-
- nr /= (1024 / sizeof (fd_t *));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fd_t *));
-
- oldfds = fdtable->fds;
- oldmax_fds = fdtable->max_fds;
- oldclose_on_exec = fdtable->close_on_exec;
-
- fdtable->fds = CALLOC (nr, sizeof (fd_t *));
- if (fdtable->fds == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation failed");
- fdtable->fds = oldfds;
- oldfds = NULL;
- ret = -1;
- goto out;
- }
-
- fdtable->max_fds = nr;
-
- if (oldfds) {
- cpy = oldmax_fds * sizeof (fd_t *);
- memcpy (fdtable->fds, oldfds, cpy);
- }
-
- /* nr will be either less than 8 or a multiple of 8 */
- bytes = nr/8;
- bytes = bytes ? bytes : 1;
- fdtable->close_on_exec = CALLOC (bytes, 1);
- if (fdtable->close_on_exec == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation "
- "failed");
- tmp = fdtable->fds;
- fdtable->fds = oldfds;
- oldfds = tmp;
- ret = -1;
- goto out;
- }
-
- if (oldclose_on_exec != NULL) {
- bytes = oldmax_fds/8;
- cpy = bytes ? bytes : 1;
- memcpy (fdtable->close_on_exec, oldclose_on_exec, cpy);
- }
- gf_log ("booster-fd", GF_LOG_TRACE, "FD-table expanded: Old: %d,New: %d"
- , oldmax_fds, nr);
- ret = 0;
-
-out:
- FREE (oldfds);
- FREE (oldclose_on_exec);
-
- return ret;
-}
-
-booster_fdtable_t *
-booster_fdtable_alloc (void)
-{
- booster_fdtable_t *fdtable = NULL;
- int32_t ret = -1;
-
- fdtable = CALLOC (1, sizeof (*fdtable));
- GF_VALIDATE_OR_GOTO ("booster-fd", fdtable, out);
-
- LOCK_INIT (&fdtable->lock);
-
- LOCK (&fdtable->lock);
- {
- ret = booster_fdtable_expand (fdtable, 0);
- }
- UNLOCK (&fdtable->lock);
-
- if (ret == -1) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD-table allocation "
- "failed");
- FREE (fdtable);
- fdtable = NULL;
- }
-
-out:
- return fdtable;
-}
-
-fd_t **
-__booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
-
- if (count == NULL)
- goto out;
-
- fds = fdtable->fds;
- fdtable->fds = calloc (fdtable->max_fds, sizeof (fd_t *));
- *count = fdtable->max_fds;
-
-out:
- return fds;
-}
-
-fd_t **
-booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
- if (!fdtable)
- return NULL;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, count);
- }
- UNLOCK (&fdtable->lock);
-
- return fds;
-}
-
-void
-booster_fdtable_destroy (booster_fdtable_t *fdtable)
-{
- fd_t *fd = NULL;
- fd_t **fds = NULL;
- uint fd_count = 0;
- int i = 0;
-
- if (!fdtable)
- return;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fds);
- }
- UNLOCK (&fdtable->lock);
-
- if (!fds)
- goto free_table;
-
- for (i = 0; i < fd_count; i++) {
- fd = fds[i];
- if (fd != NULL)
- fd_unref (fd);
- }
- FREE (fds);
-free_table:
- LOCK_DESTROY (&fdtable->lock);
- FREE (fdtable);
-}
-
-int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd)
-{
- int ret = -1;
- int error = 0;
-
- if (fdtable == NULL || fdptr == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return -1;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "Requested fd: %d", fd);
- LOCK (&fdtable->lock);
- {
- while (fdtable->max_fds < fd) {
- error = 0;
- error = booster_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("booster-fd", GF_LOG_ERROR,
- "Cannot expand fdtable:%s",
- strerror (error));
- goto err;
- }
- }
-
- if (!fdtable->fds[fd]) {
- fdtable->fds[fd] = fdptr;
- fd_ref (fdptr);
- ret = fd;
- } else
- gf_log ("booster-fd", GF_LOG_ERROR, "Cannot allocate fd"
- " %d (slot not empty in fdtable)", fd);
- }
-err:
- UNLOCK (&fdtable->lock);
-
- return ret;
-}
-
-void
-booster_fd_put (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD put: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- return;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- fdtable->fds[fd] = NULL;
- }
- UNLOCK (&fdtable->lock);
-
- if (fdptr)
- fd_unref (fdptr);
-}
-
-fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD ptr request: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- errno = EINVAL;
- return NULL;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- if (fdptr)
- fd_ref (fdptr);
- }
- UNLOCK (&fdtable->lock);
-
- return fdptr;
-}
-
-void
-booster_fdptr_put (fd_t *booster_fd)
-{
- if (booster_fd)
- fd_unref (booster_fd);
-}
diff --git a/booster/src/booster-fd.h b/booster/src/booster-fd.h
deleted file mode 100644
index 595a112bd..000000000
--- a/booster/src/booster-fd.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BOOSTER_FD_H
-#define _BOOSTER_FD_H
-
-#include <libglusterfsclient.h>
-#include <locking.h>
-#include <list.h>
-
-/* This struct must be updated if the fd_t in fd.h changes.
- * We cannot include those headers here because unistd.h, included
- * by glusterfs headers, conflicts with the syscall prototypes we
- * define for booster.
- */
-struct _fd {
- pid_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- struct _dict *ctx;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
-};
-typedef struct _fd fd_t;
-
-struct _booster_fd_set {
- unsigned long fd_bits[0];
-};
-typedef struct _booster_fd_set booster_fd_set_t;
-
-struct _booster_fdtable {
- booster_fd_set_t *close_on_exec;
- int refcount;
- unsigned int max_fds;
- gf_lock_t lock;
- fd_t **fds;
-};
-typedef struct _booster_fdtable booster_fdtable_t;
-
-void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-extern int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd);
-
-extern void
-booster_fd_put (booster_fdtable_t *fdtable, int fd);
-
-extern fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd);
-
-extern void
-booster_fdptr_put (fd_t *fdptr);
-
-extern void
-booster_fdtable_destroy (booster_fdtable_t *fdtable);
-
-booster_fdtable_t *
-booster_fdtable_alloc (void);
-
-#endif /* #ifndef _BOOSTER_FD_H */
diff --git a/booster/src/booster.c b/booster/src/booster.c
deleted file mode 100644
index c34ec1146..000000000
--- a/booster/src/booster.c
+++ /dev/null
@@ -1,3172 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <libglusterfsclient.h>
-#include <list.h>
-#include <pthread.h>
-#include <sys/xattr.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-#include <ctype.h>
-#include <logging.h>
-#include <utime.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <fcntl.h>
-#include "booster-fd.h"
-
-#ifndef GF_UNIT_KB
-#define GF_UNIT_KB 1024
-#endif
-
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-/* attr constructor registers this function with libc's
- * _init function as a function that must be called before
- * the main() of the program.
- */
-static void booster_lib_init (void) __attribute__((constructor));
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-
-extern int pipe (int filedes[2]);
-/* We define these flags so that we can remove fcntl.h from the include path.
- * fcntl.h has certain defines and other lines of code that redirect the
- * application's open and open64 calls to the syscalls defined by
- * libc, for us, thats not a Good Thing (TM).
- */
-#ifndef GF_O_CREAT
-#define GF_O_CREAT 0x40
-#endif
-
-#ifndef GF_O_TRUNC
-#define GF_O_TRUNC 0x200
-#endif
-
-#ifndef GF_O_RDWR
-#define GF_O_RDWR 0x2
-#endif
-
-#ifndef GF_O_WRONLY
-#define GF_O_WRONLY 0x1
-#endif
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
-typedef enum {
- BOOSTER_OPEN,
- BOOSTER_CREAT
-} booster_op_t;
-
-struct _inode;
-struct _dict;
-
-ssize_t
-write (int fd, const void *buf, size_t count);
-
-/* open, open64, creat */
-static int (*real_open) (const char *pathname, int flags, ...);
-static int (*real_open64) (const char *pathname, int flags, ...);
-static int (*real_creat) (const char *pathname, mode_t mode);
-static int (*real_creat64) (const char *pathname, mode_t mode);
-
-/* read, readv, pread, pread64 */
-static ssize_t (*real_read) (int fd, void *buf, size_t count);
-static ssize_t (*real_readv) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pread) (int fd, void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pread64) (int fd, void *buf, size_t count,
- uint64_t offset);
-
-/* write, writev, pwrite, pwrite64 */
-static ssize_t (*real_write) (int fd, const void *buf, size_t count);
-static ssize_t (*real_writev) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pwrite) (int fd, const void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pwrite64) (int fd, const void *buf, size_t count,
- uint64_t offset);
-
-/* lseek, llseek, lseek64 */
-static off_t (*real_lseek) (int fildes, unsigned long offset, int whence);
-static off_t (*real_lseek64) (int fildes, uint64_t offset, int whence);
-
-/* close */
-static int (*real_close) (int fd);
-
-/* dup dup2 */
-static int (*real_dup) (int fd);
-static int (*real_dup2) (int oldfd, int newfd);
-
-static pid_t (*real_fork) (void);
-static int (*real_mkdir) (const char *pathname, mode_t mode);
-static int (*real_rmdir) (const char *pathname);
-static int (*real_chmod) (const char *pathname, mode_t mode);
-static int (*real_chown) (const char *pathname, uid_t owner, gid_t group);
-static int (*real_fchmod) (int fd, mode_t mode);
-static int (*real_fchown) (int fd, uid_t, gid_t gid);
-static int (*real_fsync) (int fd);
-static int (*real_ftruncate) (int fd, off_t length);
-static int (*real_ftruncate64) (int fd, loff_t length);
-static int (*real_link) (const char *oldpath, const char *newname);
-static int (*real_rename) (const char *oldpath, const char *newpath);
-static int (*real_utimes) (const char *path, const struct timeval times[2]);
-static int (*real_utime) (const char *path, const struct utimbuf *buf);
-static int (*real_mknod) (const char *path, mode_t mode, dev_t dev);
-static int (*real_mkfifo) (const char *path, mode_t mode);
-static int (*real_unlink) (const char *path);
-static int (*real_symlink) (const char *oldpath, const char *newpath);
-static int (*real_readlink) (const char *path, char *buf, size_t bufsize);
-static char * (*real_realpath) (const char *path, char *resolved);
-static DIR * (*real_opendir) (const char *path);
-static struct dirent * (*real_readdir) (DIR *dir);
-static struct dirent64 * (*real_readdir64) (DIR *dir);
-static int (*real_readdir_r) (DIR *dir, struct dirent *entry,
- struct dirent **result);
-static int (*real_readdir64_r) (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result);
-static int (*real_closedir) (DIR *dh);
-static int (*real___xstat) (int ver, const char *path, struct stat *buf);
-static int (*real___xstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_stat) (const char *path, struct stat *buf);
-static int (*real_stat64) (const char *path, struct stat64 *buf);
-static int (*real___fxstat) (int ver, int fd, struct stat *buf);
-static int (*real___fxstat64) (int ver, int fd, struct stat64 *buf);
-static int (*real_fstat) (int fd, struct stat *buf);
-static int (*real_fstat64) (int fd , struct stat64 *buf);
-static int (*real___lxstat) (int ver, const char *path, struct stat *buf);
-static int (*real___lxstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_lstat) (const char *path, struct stat *buf);
-static int (*real_lstat64) (const char *path, struct stat64 *buf);
-static int (*real_statfs) (const char *path, struct statfs *buf);
-static int (*real_statfs64) (const char *path, struct statfs64 *buf);
-static int (*real_statvfs) (const char *path, struct statvfs *buf);
-static int (*real_statvfs64) (const char *path, struct statvfs64 *buf);
-static ssize_t (*real_getxattr) (const char *path, const char *name,
- void *value, size_t size);
-static ssize_t (*real_lgetxattr) (const char *path, const char *name,
- void *value, size_t size);
-static int (*real_remove) (const char* path);
-static int (*real_lchown) (const char *path, uid_t owner, gid_t group);
-static void (*real_rewinddir) (DIR *dirp);
-static void (*real_seekdir) (DIR *dirp, off_t offset);
-static off_t (*real_telldir) (DIR *dirp);
-
-static ssize_t (*real_sendfile) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static ssize_t (*real_sendfile64) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static int (*real_fcntl) (int fd, int cmd, ...);
-static int (*real_chdir) (const char *path);
-static int (*real_fchdir) (int fd);
-static char * (*real_getcwd) (char *buf, size_t size);
-static int (*real_truncate) (const char *path, off_t length);
-static int (*real_truncate64) (const char *path, loff_t length);
-static int (*real_setxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_lsetxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_fsetxattr) (int filedes, const char *name,
- const void *value, size_t size, int flags);
-
-
-#define RESOLVE(sym) do { \
- if (!real_##sym) \
- real_##sym = dlsym (RTLD_NEXT, #sym); \
- } while (0)
-
-/*TODO: set proper value */
-#define MOUNT_HASH_SIZE 256
-
-struct booster_mount {
- dev_t st_dev;
- glusterfs_handle_t handle;
- struct list_head device_list;
-};
-typedef struct booster_mount booster_mount_t;
-
-static booster_fdtable_t *booster_fdtable = NULL;
-
-extern int booster_configure (char *confpath);
-/* This is dup'ed every time VMP open/creat wants a new fd.
- * This is needed so we occupy an entry in the process' file
- * table.
- */
-int process_piped_fd = -1;
-
-static int
-booster_get_process_fd ()
-{
- return real_dup (process_piped_fd);
-}
-
-/* The following two define which file contains
- * the FSTAB configuration for VMP-based usage.
- */
-#define DEFAULT_BOOSTER_CONF CONFDIR"/booster.conf"
-#define BOOSTER_CONF_ENV_VAR "GLUSTERFS_BOOSTER_FSTAB"
-
-
-/* The following define which log file is used when
- * using the old mount point bypass approach.
- */
-#define BOOSTER_DEFAULT_LOG CONFDIR"/booster.log"
-#define BOOSTER_LOG_ENV_VAR "GLUSTERFS_BOOSTER_LOG"
-
-void
-do_open (int fd, const char *pathname, int flags, mode_t mode, booster_op_t op)
-{
- char *specfile = NULL;
- char *mount_point = NULL;
- int32_t size = 0;
- int32_t ret = -1;
- FILE *specfp = NULL;
- glusterfs_file_t fh = NULL;
- char *logfile = NULL;
- glusterfs_init_params_t iparams = {
- .loglevel = "error",
- .lookup_timeout = 600,
- .stat_timeout = 600,
- };
-
- gf_log ("booster", GF_LOG_DEBUG, "Opening using MPB: %s", pathname);
- size = fgetxattr (fd, "user.glusterfs-booster-volfile", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfile = calloc (1, size);
- if (!specfile) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-volfile", specfile,
- size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfp = tmpfile ();
- if (!specfp) {
- gf_log ("booster", GF_LOG_ERROR, "Temp file creation failed"
- ": %s", strerror (errno));
- goto out;
- }
-
- ret = fwrite (specfile, size, 1, specfp);
- if (ret != 1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to write volfile: %s",
- strerror (errno));
- goto out;
- }
-
- fseek (specfp, 0L, SEEK_SET);
-
- size = fgetxattr (fd, "user.glusterfs-booster-mount", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- mount_point = calloc (size, sizeof (char));
- if (!mount_point) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-mount", mount_point, size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- logfile = getenv (BOOSTER_LOG_ENV_VAR);
- if (logfile) {
- if (strlen (logfile) > 0)
- iparams.logfile = strdup (logfile);
- else
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- } else {
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Using log-file: %s", iparams.logfile);
- iparams.specfp = specfp;
-
- ret = glusterfs_mount (mount_point, &iparams);
- if (ret == -1) {
- if (errno != EEXIST) {
- gf_log ("booster", GF_LOG_ERROR, "Mount failed over"
- " glusterfs");
- goto out;
- } else
- gf_log ("booster", GF_LOG_ERROR, "Already mounted");
- }
-
- switch (op) {
- case BOOSTER_OPEN:
- gf_log ("booster", GF_LOG_TRACE, "Booster open call");
- fh = glusterfs_open (pathname, flags, mode);
- break;
-
- case BOOSTER_CREAT:
- gf_log ("booster", GF_LOG_TRACE, "Booster create call");
- fh = glusterfs_creat (pathname, mode);
- break;
- }
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Error performing operation");
- goto out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to get unused FD");
- goto out;
- }
- fh = NULL;
-
-out:
- if (specfile) {
- free (specfile);
- }
-
- if (specfp) {
- fclose (specfp);
- }
-
- if (mount_point) {
- free (mount_point);
- }
-
- if (fh) {
- glusterfs_close (fh);
- }
-
- return;
-}
-
-int
-vmp_open (const char *pathname, int flags, ...)
-{
- mode_t mode = 0;
- int fd = -1;
- glusterfs_file_t fh = NULL;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- fh = glusterfs_open (pathname, flags, mode);
- }
- else
- fh = glusterfs_open (pathname, flags);
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "VMP open failed");
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create open fd");
- goto fh_close_out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map fd into table");
- goto realfd_close_out;
- }
-
- return fd;
-
-realfd_close_out:
- real_close (fd);
- fd = -1;
-
-fh_close_out:
- glusterfs_close (fh);
-
-out:
- return fd;
-}
-
-#define BOOSTER_USE_OPEN64 1
-#define BOOSTER_DONT_USE_OPEN64 0
-
-int
-booster_open (const char *pathname, int use64, int flags, ...)
-{
- int ret = -1;
- mode_t mode = 0;
- va_list ap;
- int (*my_open) (const char *pathname, int flags, ...);
-
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Open: %s", pathname);
- /* First try opening through the virtual mount point.
- * The difference lies in the fact that:
- * 1. We depend on libglusterfsclient library to perform
- * the translation from the path to handle.
- * 2. We do not go to the file system for the fd, instead
- * we use booster_get_process_fd (), which returns a dup'ed
- * fd of a pipe created in booster_init.
- */
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- ret = vmp_open (pathname, flags, mode);
- }
- else
- ret = vmp_open (pathname, flags);
-
- /* We receive an ENODEV if the VMP does not exist. If we
- * receive an error other than ENODEV, it means, there
- * actually was an error performing vmp_open. This must
- * be returned to the user.
- */
- if ((ret < 0) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Error in opening file over "
- " VMP: %s", strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File opened");
- goto out;
- }
-
- if (use64) {
- gf_log ("booster", GF_LOG_TRACE, "Using 64-bit open");
- my_open = real_open64;
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Using 32-bit open");
- my_open = real_open;
- }
-
- /* It is possible the RESOLVE macro is not able
- * to resolve the symbol of a function, in that case
- * we dont want to seg-fault on calling a NULL functor.
- */
- if (my_open == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "open not resolved");
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = my_open (pathname, flags, mode);
- } else
- ret = my_open (pathname, flags);
-
- if (ret != -1) {
- do_open (ret, pathname, flags, mode, BOOSTER_OPEN);
- }
-
-out:
- return ret;
-}
-
-/* This is done to over-write existing definitions of open and open64 inside
- * libc with our own copies. __REDIRECT is provided by libc.
- *
- * XXX: This will not work anywhere other than libc based systems.
- */
-int __REDIRECT (booster_false_open, (__const char *__file, int __oflag, ...),
- open) __nonnull ((1));
-int __REDIRECT (booster_false_open64, (__const char *__file, int __oflag, ...),
- open64) __nonnull ((1));
-int
-booster_false_open (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags,
- mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-booster_false_open64 (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags, mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-vmp_creat (const char *pathname, mode_t mode)
-{
- int fd = -1;
- glusterfs_file_t fh = NULL;
-
- fh = glusterfs_creat (pathname, mode);
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Create failed: %s: %s",
- pathname, strerror (errno));
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create fd");
- goto close_out;
- }
-
- if ((booster_fd_unused_get (booster_fdtable, fh, fd)) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map unused fd");
- goto real_close_out;
- }
-
- return fd;
-
-real_close_out:
- real_close (fd);
- fd = -1;
-
-close_out:
- glusterfs_close (fh);
-
-out:
- return -1;
-}
-
-int __REDIRECT (booster_false_creat, (const char *pathname, mode_t mode),
- creat) __nonnull ((1));
-int __REDIRECT (booster_false_creat64, (const char *pathname, mode_t mode),
- creat64) __nonnull ((1));
-
-int
-booster_false_creat (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-int
-booster_false_creat64 (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat64 (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-/* pread */
-
-ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread: fd %d, count %lu, offset %lu"
- ,fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread64: fd %d, count %lu, offset %"
- PRIu64, fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-read (int fd, void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd;
-
- gf_log ("booster", GF_LOG_TRACE, "read: fd %d, count %lu", fd,
- (long unsigned)count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_read == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_read (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_read (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-readv (int fd, const struct iovec *vector, int count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "readv: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_readv == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readv (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_readv (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-write (int fd, const void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "write: fd %d, count %"GF_PRI_SIZET,
- fd, count);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_write (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_write (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-ssize_t
-writev (int fd, const struct iovec *vector, int count)
-{
- int ret = 0;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "writev: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_writev == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_writev (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_writev (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite: fd %d, count %"GF_PRI_SIZET
- ", offset %lu", fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite64: fd %d, count %"GF_PRI_SIZET
- ", offset %"PRIu64, fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-close (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "close: fd %d", fd);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- booster_fd_put (booster_fdtable, fd);
- ret = glusterfs_close (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- ret = real_close (fd);
-
- return ret;
-}
-
-#ifndef _LSEEK_DECLARED
-#define _LSEEK_DECLARED
-off_t
-lseek (int filedes, unsigned long offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %ld",
- filedes, offset);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek (filedes, offset, whence);
- }
-
- return ret;
-}
-#endif
-
-off_t
-lseek64 (int filedes, uint64_t offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %"PRIu64,
- filedes, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek64 (filedes, offset, whence);
- }
-
- return ret;
-}
-
-int
-dup (int oldfd)
-{
- int ret = -1, new_fd = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "dup: fd %d", oldfd);
- glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_fd = real_dup (oldfd);
-
- if (new_fd >=0 && glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = booster_fd_unused_get (booster_fdtable, glfs_fd,
- new_fd);
- fd_ref ((fd_t *)glfs_fd);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR,"Failed to map new fd");
- real_close (new_fd);
- }
- }
-
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return new_fd;
-}
-
-
-int
-dup2 (int oldfd, int newfd)
-{
- int ret = -1;
- glusterfs_file_t old_glfs_fd = NULL, new_glfs_fd = NULL;
-
- if (oldfd == newfd) {
- return newfd;
- }
-
- old_glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_glfs_fd = booster_fdptr_get (booster_fdtable, newfd);
-
- ret = real_dup2 (oldfd, newfd);
- if (ret >= 0) {
- if (new_glfs_fd) {
- glusterfs_close (new_glfs_fd);
- booster_fdptr_put (new_glfs_fd);
- booster_fd_put (booster_fdtable, newfd);
- new_glfs_fd = 0;
- }
-
- if (old_glfs_fd) {
- ret = booster_fd_unused_get (booster_fdtable,
- old_glfs_fd, newfd);
- fd_ref ((fd_t *)old_glfs_fd);
- if (ret == -1) {
- real_close (newfd);
- }
- }
- }
-
- if (old_glfs_fd) {
- booster_fdptr_put (old_glfs_fd);
- }
-
- if (new_glfs_fd) {
- booster_fdptr_put (new_glfs_fd);
- }
-
- return ret;
-}
-
-int
-mkdir (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkdir: path %s", pathname);
- ret = glusterfs_mkdir (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory created");
- return ret;
- }
-
- if (real_mkdir == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else
- ret = real_mkdir (pathname, mode);
-
- return ret;
-}
-
-int
-rmdir (const char *pathname)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "rmdir: path %s", pathname);
- ret = glusterfs_rmdir (pathname);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "rmdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory removed");
- return ret;
- }
-
- if (real_rmdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rmdir (pathname);
-
- return ret;
-}
-
-int
-chmod (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chmod: path %s", pathname);
- ret = glusterfs_chmod (pathname, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chmod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chmod succeeded");
- return ret;
- }
-
- if (real_chmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chmod (pathname, mode);
-
- return ret;
-}
-
-int
-chown (const char *pathname, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chown: path: %s", pathname);
- ret = glusterfs_chown (pathname, owner, group);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chown failed: %s\n",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chown succeeded");
- return ret;
- }
-
- if (real_chown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chown (pathname, owner, group);
-
- return ret;
-}
-
-int
-fchown (int fd, uid_t owner, gid_t group)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchown: fd %d, uid %d, gid %d", fd,
- owner, group);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchown (fd, owner, group);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchown (fh, owner, group);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-
-#define MOUNT_TABLE_HASH_SIZE 256
-
-
-static void booster_cleanup (void);
-static int
-booster_init (void)
-{
- char *booster_conf_path = NULL;
- int ret = -1;
- int pipefd[2];
-
- booster_fdtable = booster_fdtable_alloc ();
- if (!booster_fdtable) {
- fprintf (stderr, "cannot allocate fdtable: %s\n",
- strerror (errno));
- goto err;
- }
-
- if (pipe (pipefd) == -1) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Pipe creation failed:%s"
- , strerror (errno));
- goto err;
- }
-
- process_piped_fd = pipefd[0];
- real_close (pipefd[1]);
- /* libglusterfsclient based VMPs should be inited only
- * after the file tables are inited so that if the socket
- * calls use the fd based syscalls, the fd tables are
- * correctly initialized to return a NULL handle, on which the
- * socket calls will fall-back to the real API.
- */
- booster_conf_path = getenv (BOOSTER_CONF_ENV_VAR);
- if (booster_conf_path != NULL) {
- if (strlen (booster_conf_path) > 0)
- ret = booster_configure (booster_conf_path);
- else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, "
- "using default path: %s", BOOSTER_CONF_ENV_VAR,
- DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
- } else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, using default"
- " path: %s", BOOSTER_CONF_ENV_VAR,DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
-
- atexit (booster_cleanup);
- if (ret == 0)
- gf_log ("booster", GF_LOG_DEBUG, "booster is inited");
- return 0;
-
-err:
- /* Sure we return an error value here
- * but who cares about booster.
- */
- return -1;
-}
-
-
-static void
-booster_cleanup (void)
-{
- /* Ideally, we should be de-initing the fd-table
- * here but the problem is that I've seen file accesses through booster
- * continuing while the atexit registered function is called. That means
- * , we cannot dealloc the fd-table since then there could be a crash
- * while trying to determine whether a given fd is for libc or for
- * libglusterfsclient.
- * We should be satisfied with having cleaned up glusterfs contexts.
- */
- glusterfs_umount_all ();
- glusterfs_reset ();
-}
-
-int
-fchmod (int fd, mode_t mode)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchmod: fd %d, mode: 0x%x", fd, mode);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchmod (fd, mode);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchmod (fh, mode);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-fsync (int fd)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsync: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsync == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fsync (fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsync (fh);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int __REDIRECT (booster_false_ftruncate, (int fd, off_t length),
- ftruncate);
-int __REDIRECT (booster_false_ftruncate64, (int fd, loff_t length),
- ftruncate64);
-
-int
-booster_false_ftruncate (int fd, off_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-booster_false_ftruncate64 (int fd, loff_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate64 (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-link (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_link (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Link failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "link call succeeded");
- return ret;
- }
-
- if (real_link == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_link (old, new);
-
- return ret;
-}
-
-int
-rename (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_rename (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Rename failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "Rename succeeded");
- return ret;
- }
-
- if (real_rename == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rename (old, new);
-
- return ret;
-}
-
-int
-utimes (const char *path, const struct timeval times[2])
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utimes: path %s", path);
- ret = glusterfs_utimes (path, times);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utimes failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utimes succeeded");
- return ret;
- }
-
- if (real_utimes == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utimes (path, times);
-
- return ret;
-}
-
-int
-utime (const char *path, const struct utimbuf *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utime: path %s", path);
- ret = glusterfs_utime (path, buf);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utime failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utime succeeded");
- return ret;
- }
-
- if (real_utime == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utime (path, buf);
-
- return ret;
-}
-
-int
-mknod (const char *path, mode_t mode, dev_t dev)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mknod: path %s", path);
- ret = glusterfs_mknod (path, mode, dev);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mknod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mknod succeeded");
- return ret;
- }
-
- if (real_mknod) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mknod (path, mode, dev);
-
- return ret;
-}
-
-int
-mkfifo (const char *path, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkfifo: path %s", path);
- ret = glusterfs_mkfifo (path, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkfifo failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mkfifo succeeded");
- return ret;
- }
-
- if (real_mkfifo == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mkfifo (path, mode);
-
- return ret;
-}
-
-int
-unlink (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "unlink: path %s", path);
- ret = glusterfs_unlink (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "unlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "unlink succeeded");
- return ret;
- }
-
- if (real_unlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_unlink (path);
-
- return ret;
-}
-
-int
-symlink (const char *oldpath, const char *newpath)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "symlink: old: %s, new: %s",
- oldpath, newpath);
- ret = glusterfs_symlink (oldpath, newpath);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "symlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "symlink succeeded");
- return ret;
- }
-
- if (real_symlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_symlink (oldpath, newpath);
-
- return ret;
-}
-
-int
-readlink (const char *path, char *buf, size_t bufsize)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "readlink: path %s", path);
- ret = glusterfs_readlink (path, buf, bufsize);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "readlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "readlink succeeded");
- return ret;
- }
-
- if (real_readlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readlink (path, buf, bufsize);
-
- return ret;
-}
-
-char *
-realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "realpath: path %s", path);
- res = glusterfs_realpath (path, resolved_path);
- if ((res == NULL) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "realpath failed: %s",
- strerror (errno));
- return res;
- }
-
- if (res != NULL) {
- gf_log ("booster", GF_LOG_TRACE, "realpath succeeded");
- return res;
- }
-
- if (real_realpath == NULL) {
- errno = ENOSYS;
- res = NULL;
- } else
- res = real_realpath (path, resolved_path);
-
- return res;
-}
-
-#define BOOSTER_GL_DIR 1
-#define BOOSTER_POSIX_DIR 2
-
-struct booster_dir_handle {
- int type;
- void *dirh;
-};
-
-DIR *
-opendir (const char *path)
-{
- glusterfs_dir_t gdir = NULL;
- struct booster_dir_handle *bh = NULL;
- DIR *pdir = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "opendir: path: %s", path);
- bh = calloc (1, sizeof (struct booster_dir_handle));
- if (!bh) {
- gf_log ("booster", GF_LOG_ERROR, "memory allocation failed");
- errno = ENOMEM;
- goto out;
- }
-
- gdir = glusterfs_opendir (path);
- if (gdir) {
- gf_log ("booster", GF_LOG_TRACE, "Gluster dir opened");
- bh->type = BOOSTER_GL_DIR;
- bh->dirh = (void *)gdir;
- goto out;
- } else if ((!gdir) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Opendir failed");
- goto free_out;
- }
-
- if (real_opendir == NULL) {
- errno = ENOSYS;
- goto free_out;
- }
-
- pdir = real_opendir (path);
-
- if (pdir) {
- bh->type = BOOSTER_POSIX_DIR;
- bh->dirh = (void *)pdir;
- goto out;
- }
-
-free_out:
- if (bh) {
- free (bh);
- bh = NULL;
- }
-out:
- return (DIR *)bh;
-}
-
-int __REDIRECT (booster_false_readdir_r, (DIR *dir, struct dirent *entry,
- struct dirent **result), readdir_r) __nonnull ((1));
-int __REDIRECT (booster_false_readdir64_r, (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result), readdir64_r) __nonnull ((1));
-
-int
-booster_false_readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh, entry,
- result);
-
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-int
-booster_false_readdir64_r (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh,
- (struct dirent *)entry,
- (struct dirent **)result);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir64_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir64_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-struct dirent *
-__REDIRECT (booster_false_readdir, (DIR *dir), readdir) __nonnull ((1));
-
-struct dirent64 *
-__REDIRECT (booster_false_readdir64, (DIR *dir), readdir64) __nonnull ((1));
-
-struct dirent *
-booster_false_readdir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-struct dirent64 *
-booster_false_readdir64 (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent64 *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir64 ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-int
-closedir (DIR *dh)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dh;
- int ret = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on gluster");
- ret = glusterfs_closedir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on posix");
- if (real_closedir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_closedir ((DIR *)bh->dirh);
- } else {
- errno = EBADF;
- }
-
- if (ret == 0) {
- free (bh);
- bh = NULL;
- }
-out:
- return ret;
-}
-
-/* The real stat functions reside in booster_stat.c to
- * prevent clash with the statX prototype and functions
- * declared from sys/stat.h
- */
-int
-booster_xstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat succeeded");
- goto out;
- }
-
- if (real___xstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___xstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_xstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat64: path: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat64 succeeded");
- goto out;
- }
-
- if (real___xstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___xstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_stat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat succeeded");
- goto out;
- }
-
- if (real_stat != NULL)
- ret = real_stat (path, sbuf);
- else if (real___xstat != NULL)
- ret = real___xstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_stat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat64 succeeded");
- goto out;
- }
-
- if (real_stat64 != NULL)
- ret = real_stat64 (path, sbuf);
- else if (real___xstat64 != NULL)
- ret = real___xstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat (int ver, int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___fxstat (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat64 (int ver, int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- ret = real___fxstat64 (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat (int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat != NULL)
- ret = real_fstat (fd, sbuf);
- else if (real___fxstat != NULL)
- ret = real___fxstat (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat64 (int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat64 != NULL)
- ret = real_fstat64 (fd, sbuf);
- else if (real___fxstat64 != NULL)
- /* Not sure how portable the use of 0 for
- * version number is but it works over glibc.
- * We need this because, I've
- * observed that all the above real* functors can be
- * NULL. In that case, this is our last and only option.
- */
- ret = real___fxstat64 (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_lxstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat succeeded");
- goto out;
- }
-
- if (real___lxstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___lxstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lxstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat64 succeeded");
- goto out;
- }
-
- if (real___lxstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___lxstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lstat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat succeeded");
- goto out;
- }
-
- if (real_lstat != NULL)
- ret = real_lstat (path, sbuf);
- else if (real___lxstat != NULL)
- ret = real___lxstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_lstat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat64 succeeded");
- goto out;
- }
-
- if (real_lstat64 != NULL)
- ret = real_lstat64 (path, sbuf);
- else if (real___lxstat64 != NULL)
- ret = real___lxstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_statfs (const char *pathname, struct statfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statfs: path %s", pathname);
- ret = glusterfs_statfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs succeeded");
- goto out;
- }
-
- if (real_statfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statfs64 (const char *pathname, struct statfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: path %s", pathname);
- ret = glusterfs_statfs (pathname, (struct statfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs64 succeeded");
- goto out;
- }
-
- if (real_statfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs (const char *pathname, struct statvfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs: path %s", pathname);
- ret = glusterfs_statvfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs succeeded");
- goto out;
- }
-
- if (real_statvfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs64 (const char *pathname, struct statvfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs64: path %s", pathname);
- ret = glusterfs_statvfs (pathname, (struct statvfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs64 succeeded");
- goto out;
- }
-
- if (real_statvfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "getxattr: path %s, name %s", path,
- name);
- ret = glusterfs_getxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "getxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "getxattr succeeded");
- return ret;
- }
-
- if (real_getxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_getxattr (path, name, value, size);
-out:
- return ret;
-}
-
-
-ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr: path %s, name %s", path,
- name);
- ret = glusterfs_lgetxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lgetxattr failed: %s",
- strerror (errno));
-
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr succeeded");
- return ret;
- }
-
- if (real_lgetxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_lgetxattr (path, name, value, size);
-out:
- return ret;
-}
-
-int
-remove (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "remove: %s", path);
- ret = glusterfs_remove (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "remove failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "remove succeeded");
- goto out;
- }
-
- if (real_remove == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_remove (path);
-
-out:
- return ret;
-}
-
-int
-lchown (const char *path, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lchown: path %s", path);
- ret = glusterfs_lchown (path, owner, group);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lchown failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_ERROR, "lchown succeeded");
- goto out;
- }
-
- if (real_lchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_lchown (path, owner, group);
-
-out:
- return ret;
-}
-
-void
-booster_rewinddir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on glusterfs");
- glusterfs_rewinddir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_rewinddir == NULL) {
- errno = ENOSYS;
- goto out;
- }
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on posix");
- real_rewinddir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-void
-booster_seekdir (DIR *dir, off_t offset)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "seekdir on glusterfs");
- glusterfs_seekdir ((glusterfs_dir_t)bh->dirh, offset);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_seekdir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "seekdir on posix");
- real_seekdir ((DIR *)bh->dirh, offset);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-off_t
-booster_telldir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- off_t offset = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "telldir on glusterfs");
- offset = glusterfs_telldir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_telldir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "telldir on posix");
- offset = real_telldir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return offset;
-}
-
-
-pid_t
-fork (void)
-{
- pid_t pid = 0;
- char child = 0;
-
- glusterfs_log_lock ();
- {
- pid = real_fork ();
- }
- glusterfs_log_unlock ();
-
- child = (pid == 0);
- if (child) {
- booster_cleanup ();
- booster_init ();
- }
-
- return pid;
-}
-
-ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile: in fd %d, out fd %d, offset"
- " %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd, *offset,
- count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile64: in fd %d, out fd %d,"
- " offset %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd,
- *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile64 (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-
-int
-fcntl (int fd, int cmd, ...)
-{
- va_list ap;
- int ret = -1;
- long arg = 0;
- struct flock *lock = NULL;
- glusterfs_file_t glfs_fd = 0;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- gf_log ("booster", GF_LOG_TRACE, "fcntl: fd %d, cmd %d", fd, cmd);
- switch (cmd) {
- case F_DUPFD:
- ret = dup (fd);
- break;
- /*
- * FIXME: Consider this case when implementing F_DUPFD, F_GETFD
- * etc flags in libglusterfsclient. Commenting it out for
- * timebeing since it is defined only in linux kernel
- * versions >= 2.6.24.
- */
- /* case F_DUPFD_CLOEXEC: */
- case F_GETFD:
- if (glfs_fd != NULL) {
- ret = booster_get_close_on_exec (booster_fdtable, fd)
- ? FD_CLOEXEC : 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_GETFL:
- case F_GETOWN:
- case F_GETSIG:
- case F_GETLEASE:
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd);
- }
- break;
-
- case F_SETFD:
- if (glfs_fd != NULL) {
- booster_set_close_on_exec (booster_fdtable, fd);
- ret = 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_SETFL:
- case F_SETOWN:
- case F_SETSIG:
- case F_SETLEASE:
- case F_NOTIFY:
- va_start (ap, cmd);
- arg = va_arg (ap, long);
- va_end (ap);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, arg);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, arg);
- }
- break;
-
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (lock == NULL) {
- errno = EINVAL;
- goto out;
- }
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, lock);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, lock);
- }
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-chdir (const char *path)
-{
- int ret = -1;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "chdir: path %s", path);
-
- pthread_mutex_lock (&cwdlock);
- {
- res = glusterfs_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "getcwd failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- ret = glusterfs_chdir (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chdir failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chdir succeeded");
- goto unlock;
- }
-
- if (real_chdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto unlock;
- }
-
- ret = real_chdir (path);
- if (ret == -1) {
- glusterfs_chdir (cwd);
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return ret;
-}
-
-
-int
-fchdir (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_fchdir (fd);
- if (ret == 0) {
- res = real_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR,
- "getcwd failed (%s)",
- strerror (errno));
- ret = -1;
- } else {
- glusterfs_chdir (cwd);
- }
- }
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchdir (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-char *
-getcwd (char *buf, size_t size)
-{
- char *res = NULL;
-
- res = glusterfs_getcwd (buf, size);
- if ((res == NULL) && (errno == ENODEV)) {
- res = real_getcwd (buf, size);
- }
-
- return res;
-}
-
-
-int __REDIRECT (booster_false_truncate, (const char *path, off_t length),
- truncate) __nonnull ((1));
-int __REDIRECT (booster_false_truncate64, (const char *path, loff_t length),
- truncate64) __nonnull ((1));;
-
-int
-booster_false_truncate (const char *path, off_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate: path (%s) length (%"PRIu64
- ")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate succeeded");
- goto out;
- }
-
- if (real_truncate != NULL)
- ret = real_truncate (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-booster_false_truncate64 (const char *path, loff_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate64: path (%s) length "
- "(%"PRIu64")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate64 succeeded");
- goto out;
- }
-
- if (real_truncate64 != NULL)
- ret = real_truncate64 (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-setxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "setxattr: path: %s", path);
- ret = glusterfs_setxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "setxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "setxattr succeeded");
- goto out;
- }
-
- if (real_setxattr != NULL)
- ret = real_setxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-lsetxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr: path: %s", path);
- ret = glusterfs_lsetxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lsetxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr succeeded");
- goto out;
- }
-
- if (real_lsetxattr != NULL)
- ret = real_lsetxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-fsetxattr (int fd, const char *name, const void *value, size_t size, int flags)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsetxattr: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsetxattr != NULL)
- ret = real_fsetxattr (fd, name, value, size, flags);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsetxattr (fh, name, value, size, flags);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-
-void
-booster_lib_init (void)
-{
-
- RESOLVE (open);
- RESOLVE (open64);
- RESOLVE (creat);
- RESOLVE (creat64);
-
- RESOLVE (read);
- RESOLVE (readv);
- RESOLVE (pread);
- RESOLVE (pread64);
-
- RESOLVE (write);
- RESOLVE (writev);
- RESOLVE (pwrite);
- RESOLVE (pwrite64);
-
- RESOLVE (lseek);
- RESOLVE (lseek64);
-
- RESOLVE (close);
-
- RESOLVE (dup);
- RESOLVE (dup2);
-
- RESOLVE (fork);
- RESOLVE (mkdir);
- RESOLVE (rmdir);
- RESOLVE (chmod);
- RESOLVE (chown);
- RESOLVE (fchmod);
- RESOLVE (fchown);
- RESOLVE (fsync);
- RESOLVE (ftruncate);
- RESOLVE (ftruncate64);
- RESOLVE (link);
- RESOLVE (rename);
- RESOLVE (utimes);
- RESOLVE (utime);
- RESOLVE (mknod);
- RESOLVE (mkfifo);
- RESOLVE (unlink);
- RESOLVE (symlink);
- RESOLVE (readlink);
- RESOLVE (realpath);
- RESOLVE (opendir);
- RESOLVE (readdir);
- RESOLVE (readdir64);
- RESOLVE (closedir);
- RESOLVE (__xstat);
- RESOLVE (__xstat64);
- RESOLVE (stat);
- RESOLVE (stat64);
- RESOLVE (__fxstat);
- RESOLVE (__fxstat64);
- RESOLVE (fstat);
- RESOLVE (fstat64);
- RESOLVE (__lxstat);
- RESOLVE (__lxstat64);
- RESOLVE (lstat);
- RESOLVE (lstat64);
- RESOLVE (statfs);
- RESOLVE (statfs64);
- RESOLVE (statvfs);
- RESOLVE (statvfs64);
- RESOLVE (getxattr);
- RESOLVE (lgetxattr);
- RESOLVE (remove);
- RESOLVE (lchown);
- RESOLVE (rewinddir);
- RESOLVE (seekdir);
- RESOLVE (telldir);
- RESOLVE (sendfile);
- RESOLVE (sendfile64);
- RESOLVE (readdir_r);
- RESOLVE (readdir64_r);
- RESOLVE (fcntl);
- RESOLVE (chdir);
- RESOLVE (fchdir);
- RESOLVE (getcwd);
- RESOLVE (truncate);
- RESOLVE (truncate64);
- RESOLVE (setxattr);
- RESOLVE (lsetxattr);
- RESOLVE (fsetxattr);
-
- /* This must be called after resolving real functions
- * above so that the socket based IO calls in libglusterfsclient
- * can fall back to a non-NULL real_XXX function pointer.
- * Calling booster_init before resolving the names above
- * results in seg-faults because the function symbols above are NULL.
- */
- booster_init ();
-}
-
diff --git a/booster/src/booster_fstab.c b/booster/src/booster_fstab.c
deleted file mode 100644
index 202249cad..000000000
--- a/booster/src/booster_fstab.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995-2000, 2001, 2002, 2003, 2006
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include "booster_fstab.h"
-#include <stdlib.h>
-#include <libglusterfsclient.h>
-#include <errno.h>
-
-/* The default timeout for inode and stat cache. */
-#define BOOSTER_DEFAULT_ATTR_TIMEO 5 /* In Secs */
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-glusterfs_fstab_t *
-glusterfs_fstab_init (const char *file, const char *mode)
-{
- glusterfs_fstab_t *handle = NULL;
- handle = calloc (1, sizeof (glusterfs_fstab_t));
- if (!handle) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto out;
- }
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "FSTAB file: %s", file);
- FILE *result = fopen (file,mode);
- if (result != NULL) {
- handle->fp = result;
- } else {
- gf_log ("booster-fstab", GF_LOG_ERROR, "FSTAB file open failed:"
- " %s", strerror (errno));
- free (handle);
- handle = NULL;
- }
-
-out:
-
- return handle;
-}
-
-int
-glusterfs_fstab_close (glusterfs_fstab_t *h)
-{
- if (!h)
- return -1;
-
- if (h->fp)
- fclose (h->fp);
-
- return 0;
-}
-
-/* Since the values in a line are separated by spaces, a name cannot
- contain a space. Therefore some programs encode spaces in names
- by the strings "\040". We undo the encoding when reading an entry.
- The decoding happens in place. */
-static char *
-decode_name (char *buf)
-{
- char *rp = buf;
- char *wp = buf;
-
- do
- if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '4'
- && rp[3] == '0')
- {
- /* \040 is a SPACE. */
- *wp++ = ' ';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '1')
- {
- /* \011 is a TAB. */
- *wp++ = '\t';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '2')
- {
- /* \012 is a NEWLINE. */
- *wp++ = '\n';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '\\')
- {
- /* We have to escape \\ to be able to represent all
- * characters. */
- *wp++ = '\\';
- rp += 1;
- }
- else if (rp[0] == '\\' && rp[1] == '1' && rp[2] == '3'
- && rp[3] == '4')
- {
- /* \134 is also \\. */
- *wp++ = '\\';
- rp += 3;
- }
- else
- *wp++ = *rp;
- while (*rp++ != '\0');
-
- return buf;
-}
-
-
-/* Read one mount table entry from STREAM. Returns a pointer to storage
- reused on the next call, or null for EOF or error (use feof/ferror to
- check). */
-struct glusterfs_mntent *
-__glusterfs_fstab_getent (FILE *stream, struct glusterfs_mntent *mp,
- char *buffer, int bufsiz)
-{
- char *cp;
- char *head;
-
- do
- {
- char *end_ptr;
-
- if (fgets (buffer, bufsiz, stream) == NULL)
- {
- return NULL;
- }
-
- end_ptr = strchr (buffer, '\n');
- if (end_ptr != NULL) /* chop newline */
- *end_ptr = '\0';
- else
- {
- /* Not the whole line was read. Do it now but forget
- * it. */
- char tmp[1024];
- while (fgets (tmp, sizeof tmp, stream) != NULL)
- if (strchr (tmp, '\n') != NULL)
- break;
- }
-
- head = buffer + strspn (buffer, " \t");
- /* skip empty lines and comment lines: */
- }
- while (head[0] == '\0' || head[0] == '#');
-
- cp = strsep (&head, " \t");
- mp->mnt_fsname = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_dir = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_type = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_opts = cp != NULL ? decode_name (cp) : (char *) "";
- switch (head ? sscanf (head, " %d %d ", &mp->mnt_freq,
- &mp->mnt_passno) : 0)
- {
- case 0:
- mp->mnt_freq = 0;
- case 1:
- mp->mnt_passno = 0;
- case 2:
- break;
- }
-
- return mp;
-}
-
-struct glusterfs_mntent *
-glusterfs_fstab_getent (glusterfs_fstab_t *h)
-{
- if (!h)
- return NULL;
-
- if (!h->fp)
- return NULL;
-
- return __glusterfs_fstab_getent (h->fp, &h->tmpent, h->buf,
- GF_MNTENT_BUFSIZE);
-}
-
-/* We have to use an encoding for names if they contain spaces or tabs.
- To be able to represent all characters we also have to escape the
- backslash itself. This "function" must be a macro since we use
- `alloca'. */
-#define encode_name(name) \
- do { \
- const char *rp = name; \
- \
- while (*rp != '\0') \
- if (*rp == ' ' || *rp == '\t' || *rp == '\\') \
- break; \
- else \
- ++rp; \
- \
- if (*rp != '\0') \
- { \
- /* In the worst case the length of the string \
- * can increase to four times the current \
- * length. */ \
- char *wp; \
- \
- rp = name; \
- name = wp = (char *) alloca (strlen (name) * 4 + 1); \
- \
- do { \
- if (*rp == ' ') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '4'; \
- *wp++ = '0'; \
- } \
- else if (*rp == '\t') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '1'; \
- } \
- else if (*rp == '\n') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '2'; \
- } \
- else if (*rp == '\\') \
- { \
- *wp++ = '\\'; \
- *wp++ = '\\'; \
- } \
- else \
- *wp++ = *rp; \
- } while (*rp++ != '\0'); \
- } \
- } while (0) \
-
-
-int
-glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt)
-{
- struct glusterfs_mntent mntcopy = *mnt;
- if (!h)
- return -1;
-
- if (!h->fp)
- return -1;
-
- if (fseek (h->fp, 0, SEEK_END))
- return -1;
-
- /* Encode spaces and tabs in the names. */
- encode_name (mntcopy.mnt_fsname);
- encode_name (mntcopy.mnt_dir);
- encode_name (mntcopy.mnt_type);
- encode_name (mntcopy.mnt_opts);
-
- return (fprintf (h->fp, "%s %s %s %s %d %d\n",
- mntcopy.mnt_fsname,
- mntcopy.mnt_dir,
- mntcopy.mnt_type,
- mntcopy.mnt_opts,
- mntcopy.mnt_freq,
- mntcopy.mnt_passno)
- < 0 ? 1 : 0);
-}
-
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-char *
-glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt, const char *opt)
-{
- const size_t optlen = strlen (opt);
- char *rest = mnt->mnt_opts, *p;
-
- while ((p = strstr (rest, opt)) != NULL)
- {
- if ((p == rest || p[-1] == ',')
- && (p[optlen] == '\0' || p[optlen] == '=' || p[optlen] == ','))
- return p;
-
- rest = strchr (p, ',');
- if (rest == NULL)
- break;
- ++rest;
- }
-
- return NULL;
-}
-
-void
-clean_init_params (glusterfs_init_params_t *ipars)
-{
- if (!ipars)
- return;
-
- if (ipars->volume_name)
- free (ipars->volume_name);
-
- if (ipars->specfile)
- free (ipars->specfile);
-
- if (ipars->logfile)
- free (ipars->logfile);
-
- if (ipars->loglevel)
- free (ipars->loglevel);
-
- return;
-}
-
-char *
-get_option_value (char *opt)
-{
- char *val = NULL;
- char *saveptr = NULL;
- char *copy_opt = NULL;
- char *retval = NULL;
-
- copy_opt = strdup (opt);
-
- /* Get the = before the value of the option. */
- val = index (copy_opt, '=');
- if (val) {
- /* Move to start of option */
- ++val;
-
- /* Now, to create a '\0' delimited string out of the
- * options string, first get the position where the
- * next option starts, that would be the next ','.
- */
- saveptr = index (val, ',');
- if (saveptr)
- *saveptr = '\0';
- retval = strdup (val);
- }
-
- free (copy_opt);
-
- return retval;
-}
-
-void
-booster_mount (struct glusterfs_mntent *ent)
-{
- char *opt = NULL;
- glusterfs_init_params_t ipars;
- time_t timeout = BOOSTER_DEFAULT_ATTR_TIMEO;
- char *timeostr = NULL;
- char *endptr = NULL;
-
- if (!ent)
- return;
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "Mount entry: volfile: %s,"
- " VMP: %s, Type: %s, Options: %s", ent->mnt_fsname,
- ent->mnt_dir, ent->mnt_type, ent->mnt_opts);
- if ((strcmp (ent->mnt_type, "glusterfs") != 0)) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Type is not glusterfs");
- return;
- }
-
- memset (&ipars, 0, sizeof (glusterfs_init_params_t));
- if (ent->mnt_fsname)
- ipars.specfile = strdup (ent->mnt_fsname);
-
- opt = glusterfs_fstab_hasoption (ent, "subvolume");
- if (opt)
- ipars.volume_name = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-file");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "logfile");
-
- if (opt)
- ipars.logfile = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-level");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "loglevel");
-
- if (opt)
- ipars.loglevel = get_option_value (opt);
-
- /* Attribute cache timeout */
- opt = glusterfs_fstab_hasoption (ent, "attr_timeout");
- if (opt) {
- timeostr = get_option_value (opt);
- if (timeostr)
- timeout = strtol (timeostr, &endptr, 10);
- }
-
- ipars.lookup_timeout = timeout;
- ipars.stat_timeout = timeout;
-
- if ((glusterfs_mount (ent->mnt_dir, &ipars)) == -1)
- gf_log ("booster-fstab", GF_LOG_ERROR, "VMP mounting failed");
-
- clean_init_params (&ipars);
-}
-
-int
-booster_configure (char *confpath)
-{
- int ret = -1;
- glusterfs_fstab_t *handle = NULL;
- struct glusterfs_mntent *ent = NULL;
-
- if (!confpath)
- goto out;
-
- handle = glusterfs_fstab_init (confpath, "r");
- if (!handle)
- goto out;
-
- while ((ent = glusterfs_fstab_getent (handle)) != NULL)
- booster_mount (ent);
-
- glusterfs_fstab_close (handle);
- ret = 0;
-out:
- return ret;
-}
-
-
diff --git a/booster/src/booster_fstab.h b/booster/src/booster_fstab.h
deleted file mode 100644
index 9bab04c5a..000000000
--- a/booster/src/booster_fstab.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef GLUSTERFS_FSTAB_MNTENT_H
-#define GLUSTERFS_FSTAB_MNTENT_H 1
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-
-/* General filesystem types. */
-#define GF_MNTTYPE_IGNORE "ignore" /* Ignore this entry. */
-#define GF_MNTTYPE_NFS "nfs" /* Network file system. */
-#define GF_MNTTYPE_SWAP "swap" /* Swap device. */
-
-
-/* Generic mount options. */
-#define GF_MNTOPT_DEFAULTS "defaults" /* Use all default options. */
-#define GF_MNTOPT_RO "ro" /* Read only. */
-#define GF_MNTOPT_RW "rw" /* Read/write. */
-#define GF_MNTOPT_SUID "suid" /* Set uid allowed. */
-#define GF_MNTOPT_NOSUID "nosuid" /* No set uid allowed. */
-#define GF_MNTOPT_NOAUTO "noauto" /* Do not auto mount. */
-
-
-/* Structure describing a mount table entry. */
-struct glusterfs_mntent
-{
- char *mnt_fsname; /* Device or server for filesystem. */
- char *mnt_dir; /* Directory mounted on. */
- char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
- char *mnt_opts; /* Comma-separated options for fs. */
- int mnt_freq; /* Dump frequency (in days). */
- int mnt_passno; /* Pass number for `fsck'. */
-};
-
-#define GF_MNTENT_BUFSIZE 1024
-typedef struct glusterfs_fstab_handle {
- FILE *fp;
- char buf[GF_MNTENT_BUFSIZE];
- struct glusterfs_mntent tmpent;
-}glusterfs_fstab_t;
-
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-extern glusterfs_fstab_t *glusterfs_fstab_init (const char *file,
- const char *mode);
-
-extern struct glusterfs_mntent *glusterfs_fstab_getent (glusterfs_fstab_t *h);
-
-/* Write the mount table entry described by MNT to STREAM.
- Return zero on success, nonzero on failure. */
-extern int glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt);
-
-/* Close a stream opened with `glusterfs_fstab_init'. */
-extern int glusterfs_fstab_close (glusterfs_fstab_t *h);
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-extern char *glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt,
- const char *opt);
-
-#endif
diff --git a/booster/src/booster_stat.c b/booster/src/booster_stat.c
deleted file mode 100644
index 8f76cfe37..000000000
--- a/booster/src/booster_stat.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-
-extern int
-booster_stat (const char *path, void *buf);
-
-extern int
-booster_stat64 (const char *path, void *buf);
-
-extern int
-booster_xstat (int ver, const char *path, void *buf);
-
-extern int
-booster_xstat64 (int ver, const char *path, void *buf);
-
-extern int
-booster_fxstat (int ver, int fd, void *buf);
-extern int
-booster_fxstat64 (int ver, int fd, void *buf);
-extern int
-booster_fstat (int fd, void *buf);
-extern int
-booster_fstat64 (int fd, void *buf);
-
-extern int
-booster_lstat (const char *path, void *buf);
-extern int
-booster_lstat64 (const char *path, void *buf);
-extern int
-booster_lxstat (int ver, const char *path, void *buf);
-extern int
-booster_lxstat64 (int ver, const char *path, void *buf);
-
-
-extern int
-booster_statfs (const char *path, void *buf);
-extern int
-booster_statfs64 (const char *path, void *buf);
-
-extern int
-booster_statvfs (const char *path, void *buf);
-
-extern int
-booster_statvfs64 (const char *path, void *buf);
-
-extern void *
-booster_readdir (void *dir);
-
-extern void
-booster_rewinddir (void *dir);
-
-extern void
-booster_seekdir (void *dir, off_t offset);
-
-extern off_t
-booster_telldir (void *dir);
-
-int
-stat (const char *path, void *buf)
-{
- return booster_stat (path, buf);
-}
-
-int
-stat64 (const char *path, void *buf)
-{
- return booster_stat64 (path, buf);
-}
-
-int
-__xstat (int ver, const char *path, void *buf)
-{
- return booster_xstat (ver, path, buf);
-}
-
-int
-__xstat64 (int ver, const char *path, void *buf)
-{
- return booster_xstat64 (ver, path, buf);
-}
-
-int
-__fxstat (int ver, int fd, void *buf)
-{
- return booster_fxstat (ver, fd, buf);
-}
-
-int
-__fxstat64 (int ver, int fd, void *buf)
-{
- return booster_fxstat64 (ver, fd, buf);
-}
-
-int
-fstat (int fd, void *buf)
-{
- return booster_fstat (fd, buf);
-}
-
-int
-fstat64 (int fd, void *buf)
-{
- return booster_fstat64 (fd, buf);
-}
-
-int
-lstat (const char *path, void *buf)
-{
- return booster_lstat (path, buf);
-}
-
-int
-lstat64 (const char *path, void *buf)
-{
- return booster_lstat64 (path, buf);
-}
-
-int
-__lxstat (int ver, const char *path, void *buf)
-{
- return booster_lxstat (ver, path, buf);
-}
-
-int
-__lxstat64 (int ver, const char *path, void *buf)
-{
- return booster_lxstat64 (ver, path, buf);
-}
-
-int
-statfs (const char *pathname, void *buf)
-{
- return booster_statfs (pathname, buf);
-}
-
-int
-statfs64 (const char *pathname, void *buf)
-{
- return booster_statfs64 (pathname, buf);
-}
-
-int
-statvfs (const char *pathname, void *buf)
-{
- return booster_statvfs (pathname, buf);
-}
-
-int
-statvfs64 (const char *pathname, void *buf)
-{
- return booster_statvfs64 (pathname, buf);
-}
-
-void
-rewinddir (void *dir)
-{
- return booster_rewinddir (dir);
-}
-
-void
-seekdir (void *dir, off_t offset)
-{
- return booster_seekdir (dir, offset);
-}
-
-off_t
-telldir (void *dir)
-{
- return booster_telldir (dir);
-}
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
index 800283618..216d1bb55 100644
--- a/cli/src/Makefile.am
+++ b/cli/src/Makefile.am
@@ -2,23 +2,26 @@ sbin_PROGRAMS = gluster
gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c \
cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
- cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c
+ cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-cmd-snapshot.c
gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(GF_GLUSTERFS_LIBS) $(XML_LIBS)
-gluster_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS) $(LIBXML2_LIBS)
+gluster_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
-I$(top_srcdir)/rpc/xdr/src\
-DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS)\
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
-DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
-DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
- $(LIBXML2_CFLAGS)
+ $(XML_CPPFLAGS)
+
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
CLEANFILES =
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 66d755fc2..566d7c978 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -41,6 +31,8 @@ extern struct cli_cmd volume_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
+extern struct cli_cmd cli_bd_cmds[];
+extern struct cli_cmd snapshot_cmds[];
struct cli_cmd cli_misc_cmds[];
int
@@ -55,7 +47,8 @@ cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
- cli_misc_cmds, NULL};
+ cli_misc_cmds, snapshot_cmds,
+ NULL};
struct cli_cmd *cmd_ind = NULL;
int i = 0;
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 02eb2c369..5ab208b8f 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -36,6 +26,38 @@
#include "protocol-common.h"
#include "cli1-xdr.h"
+#define MAX_SNAP_DESCRIPTION_LEN 1024
+
+struct snap_config_opt_vals_ snap_confopt_vals[] = {
+ {.op_name = "snap-max-hard-limit",
+ .question = "Changing snapshot-max-hard-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "snap-max-soft-limit",
+ .question = "Changing snapshot-max-soft-limit "
+ "will lead to deletion of snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"
+ },
+ {.op_name = "both",
+ .question = "Changing snapshot-max-hard-limit & "
+ "snapshot-max-soft-limit will lead to "
+ "deletion of snapshots if they exceed "
+ "the new limit.\nDo you want to continue?"
+ },
+ {.op_name = NULL,
+ }
+};
+
+enum cli_snap_config_set_types {
+ GF_SNAP_CONFIG_SET_HARD = 0,
+ GF_SNAP_CONFIG_SET_SOFT = 1,
+ GF_SNAP_CONFIG_SET_BOTH = 2,
+};
+typedef enum cli_snap_config_set_types cli_snap_config_set_types;
+
static const char *
id_sel (void *wcon)
{
@@ -107,9 +129,11 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
}
if (!(strcmp (host_name, "localhost") &&
- strcmp (host_name, "127.0.0.1"))) {
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
cli_err ("Please provide a valid hostname/ip other "
- "than localhost or 127.0.0.1");
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
ret = -1;
GF_FREE (tmp_host);
goto out;
@@ -148,8 +172,7 @@ cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
if (!*bricks)
ret = -1;
out:
- if (free_list_ptr)
- GF_FREE (free_list_ptr);
+ GF_FREE (free_list_ptr);
return ret;
}
@@ -169,10 +192,18 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
char *bricks = NULL;
int32_t brick_count = 0;
char *opwords[] = { "replica", "stripe", "transport", NULL };
+
+ char *invalid_volnames[] = {"volume", "type", "subvolumes", "option",
+ "end-volume", "all", "volume_not_in_ring",
+ "description", "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit", NULL};
char *w = NULL;
int op_count = 0;
int32_t replica_count = 1;
int32_t stripe_count = 1;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -194,9 +225,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (volname[0] == '-')
goto out;
- if (!strcmp (volname, "all")) {
- cli_err ("\"all\" cannot be the name of a volume.");
- goto out;
+ for (i = 0; invalid_volnames[i]; i++) {
+ if (!strcmp (volname, invalid_volnames[i])) {
+ cli_err ("\"%s\" cannot be the name of a volume.",
+ volname);
+ goto out;
+ }
}
if (strchr (volname, '/'))
@@ -304,7 +338,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
goto out;
}
index += 2;
- } else {
+ } else {
GF_ASSERT (!"opword mismatch");
ret = -1;
goto out;
@@ -327,7 +361,12 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
brick_index = index;
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
&brick_count);
if (ret)
goto out;
@@ -367,7 +406,7 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
ret = dict_set_dynstr (dict, "transport", trans_type);
if (ret)
goto out;
-
+ trans_type = NULL;
ret = dict_set_dynstr (dict, "bricks", bricks);
if (ret)
@@ -377,6 +416,10 @@ cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options
if (ret)
goto out;
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
*options = dict;
out:
@@ -385,6 +428,9 @@ out:
if (dict)
dict_destroy (dict);
}
+
+ GF_FREE (trans_type);
+
return ret;
}
@@ -645,8 +691,108 @@ out:
return ret;
}
+static inline gf_boolean_t
+cli_is_key_spl (char *key)
+{
+ return (strcmp (key, "group") == 0);
+}
+
+#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd"
+static int
+cli_add_key_group (dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char iter_key[1024] = {0,};
+ char iter_val[1024] = {0,};
+ char *saveptr = NULL;
+ char *tok_key = NULL;
+ char *tok_val = NULL;
+ char *dkey = NULL;
+ char *dval = NULL;
+ char *tagpath = NULL;
+ char *buf = NULL;
+ char line[PATH_MAX + 256] = {0,};
+ char errstr[2048] = "";
+ FILE *fp = NULL;
+
+ ret = gf_asprintf (&tagpath, "%s/groups/%s",
+ GLUSTERD_DEFAULT_WORKDIR, value);
+ if (ret == -1) {
+ tagpath = NULL;
+ goto out;
+ }
+
+ fp = fopen (tagpath, "r");
+ if (!fp) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "Unable to open file '%s'."
+ " Error: %s", tagpath, strerror (errno));
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ opt_count = 0;
+ buf = line;
+ while (fscanf (fp, "%s", buf) != EOF) {
+
+ opt_count++;
+ tok_key = strtok_r (line, "=", &saveptr);
+ tok_val = strtok_r (NULL, "=", &saveptr);
+ if (!tok_key || !tok_val) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+
+ snprintf (iter_key, sizeof (iter_key), "key%d", opt_count);
+ dkey = gf_strdup (tok_key);
+ ret = dict_set_dynstr (dict, iter_key, dkey);
+ if (ret)
+ goto out;
+ dkey = NULL;
+
+ snprintf (iter_val, sizeof (iter_val), "value%d", opt_count);
+ dval = gf_strdup (tok_val);
+ ret = dict_set_dynstr (dict, iter_val, dval);
+ if (ret)
+ goto out;
+ dval = NULL;
+
+ }
+
+ if (!opt_count) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr), "'%s' file format "
+ "not valid.", tagpath);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "count", opt_count);
+out:
+
+ GF_FREE (tagpath);
+
+ if (ret) {
+ GF_FREE (dkey);
+ GF_FREE (dval);
+ }
+
+ if (fp)
+ fclose (fp);
+
+ return ret;
+}
+#undef GLUSTERD_DEFAULT_WORKDIR
+
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options,
+ char **op_errstr)
{
dict_t *dict = NULL;
char *volname = NULL;
@@ -682,9 +828,32 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
ret = dict_set_str (dict, volname, volname);
if (ret)
goto out;
+
} else if (wordcount < 5) {
ret = -1;
goto out;
+
+ } else if (wordcount == 5 && cli_is_key_spl ((char *)words[3])) {
+ key = (char *) words[3];
+ value = (char *) words[4];
+ if ( !key || !value) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace (value, strlen (value));
+ if (ret == -1)
+ goto out;
+
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_add_key_group (dict, key, value, op_errstr);
+ if (ret == 0)
+ *options = dict;
+ goto out;
}
for (i = 3; i < wordcount; i+=2) {
@@ -703,6 +872,16 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
if (ret == -1)
goto out;
+ if (strlen (value) == 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (cli_is_key_spl (key)) {
+ ret = -1;
+ goto out;
+ }
+
sprintf (str, "key%d", count);
ret = dict_set_str (dict, str, key);
if (ret)
@@ -723,10 +902,8 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
*options = dict;
out:
- if (ret) {
- if (dict)
- dict_destroy (dict);
- }
+ if (ret)
+ dict_destroy (dict);
return ret;
}
@@ -745,6 +922,8 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
int count = 1;
char *w = NULL;
int index;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -822,7 +1001,13 @@ cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
brick_index = index;
parse_bricks:
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
+
+ if (strcmp (words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ ret = cli_cmd_bricks_parse (words, wc, brick_index, &bricks,
&brick_count);
if (ret)
goto out;
@@ -836,6 +1021,10 @@ parse_bricks:
if (ret)
goto out;
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
+
*options = dict;
out:
@@ -1018,10 +1207,8 @@ out:
dict_destroy (dict);
}
- if (tmp_brick)
- GF_FREE (tmp_brick);
- if (tmp_brick1)
- GF_FREE (tmp_brick1);
+ GF_FREE (tmp_brick);
+ GF_FREE (tmp_brick1);
return ret;
}
@@ -1040,6 +1227,7 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
char *opwords[] = { "start", "commit", "pause", "abort", "status",
NULL };
char *w = NULL;
+ gf_boolean_t is_force = _gf_false;
GF_ASSERT (words);
GF_ASSERT (options);
@@ -1137,12 +1325,17 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
}
if (wordcount == (op_index + 1)) {
- if (replace_op != GF_REPLACE_OP_COMMIT) {
+ if ((replace_op != GF_REPLACE_OP_COMMIT) &&
+ (replace_op != GF_REPLACE_OP_START)) {
ret = -1;
goto out;
}
if (!strcmp ("force", words[op_index])) {
- replace_op = GF_REPLACE_OP_COMMIT_FORCE;
+ if (replace_op == GF_REPLACE_OP_COMMIT)
+ replace_op = GF_REPLACE_OP_COMMIT_FORCE;
+
+ else if (replace_op == GF_REPLACE_OP_START)
+ is_force = _gf_true;
}
}
@@ -1156,6 +1349,9 @@ cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
if (ret)
goto out;
+ ret = dict_set_int32 (dict, "force", is_force);
+ if (ret)
+ goto out;
*options = dict;
@@ -1400,22 +1596,161 @@ gsyncd_glob_check (const char *w)
return !!strpbrk (w, "*?[");
}
+static int
+config_parse (const char **words, int wordcount, dict_t *dict,
+ unsigned cmdi, unsigned glob)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ char *append_str = NULL;
+ size_t append_len = 0;
+ char *subop = NULL;
+
+ switch ((wordcount - 1) - cmdi) {
+ case 0:
+ subop = gf_strdup ("get-all");
+ break;
+ case 1:
+ if (words[cmdi + 1][0] == '!') {
+ (words[cmdi + 1])++;
+ if (gf_asprintf (&subop, "del%s",
+ glob ? "-glob" : "") == -1)
+ subop = NULL;
+ } else
+ subop = gf_strdup ("get");
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+ break;
+ default:
+ if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
+
+ ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
+
+ /* join the varargs by spaces to get the op_value */
+
+ for (i = cmdi + 2; i < wordcount; i++)
+ append_len += (strlen (words[i]) + 1);
+ /* trailing strcat will add two bytes, make space for that */
+ append_len++;
+
+ append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = cmdi + 2; i < wordcount; i++) {
+ strcat (append_str, words[i]);
+ strcat (append_str, " ");
+ }
+ append_str[append_len - 2] = '\0';
+ /* "checkpoint now" is special: we resolve that "now" */
+ if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
+ strcmp (append_str, "now") == 0) {
+ struct timeval tv = {0,};
+
+ ret = gettimeofday (&tv, NULL);
+ if (ret == -1)
+ goto out; /* FIXME: free append_str? */
+
+ GF_FREE (append_str);
+ append_str = GF_CALLOC (1, 300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ strcpy (append_str, "as of ");
+ gf_time_fmt (append_str + strlen ("as of "),
+ 300 - strlen ("as of "),
+ tv.tv_sec, gf_timefmt_FT);
+ }
+
+ ret = dict_set_dynstr (dict, "op_value", append_str);
+ }
+
+ ret = -1;
+ if (subop) {
+ ret = dict_set_dynstr (dict, "subop", subop);
+ if (!ret)
+ subop = NULL;
+ }
+
+out:
+ if (ret && append_str)
+ GF_FREE (append_str);
+
+ GF_FREE (subop);
+
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+force_push_pem_parse (const char **words, int wordcount,
+ dict_t *dict, unsigned *cmdi)
+{
+ int32_t ret = 0;
+
+ if (!strcmp ((char *)words[wordcount-1], "force")) {
+ if ((strcmp ((char *)words[wordcount-2], "start")) &&
+ (strcmp ((char *)words[wordcount-2], "stop")) &&
+ (strcmp ((char *)words[wordcount-2], "create")) &&
+ (strcmp ((char *)words[wordcount-2], "push-pem"))) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "force",
+ _gf_true);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+ if (!strcmp ((char *)words[wordcount-2], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-3], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+ } else if (!strcmp ((char *)words[wordcount-1], "push-pem")) {
+ if (strcmp ((char *)words[wordcount-2], "create")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32 (dict, "push_pem", 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ }
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+
int32_t
cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
{
int32_t ret = -1;
dict_t *dict = NULL;
gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
- char *append_str = NULL;
- size_t append_len = 0;
- char *subop = NULL;
int i = 0;
unsigned masteri = 0;
unsigned slavei = 0;
unsigned glob = 0;
unsigned cmdi = 0;
- char *opwords[] = { "status", "start", "stop", "config",
- "log-rotate", NULL };
+ char *opwords[] = { "create", "status", "start", "stop",
+ "config", "force", "delete",
+ "push-pem", "detail", NULL };
char *w = NULL;
GF_ASSERT (words);
@@ -1427,10 +1762,11 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
/* new syntax:
*
- * volume geo-replication [$m [$s]] status
+ * volume geo-replication $m $s create [push-pem] [force]
+ * volume geo-replication [$m [$s]] status [detail]
* volume geo-replication [$m] $s config [[!]$opt [$val]]
- * volume geo-replication $m $s start|stop
- * volume geo-replication $m [$s] log-rotate
+ * volume geo-replication $m $s start|stop [force]
+ * volume geo-replication $m $s delete
*/
if (wordcount < 3)
@@ -1461,6 +1797,13 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (slavei == 3)
masteri = 2;
} else if (i <= 3) {
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ /* For status detail it is mandatory to provide
+ * both master and slave */
+ ret = -1;
+ goto out;
+ }
+
/* no $s, can only be status cmd
* (with either a single $m before it or nothing)
* -- these conditions imply that i <= 3 after
@@ -1487,7 +1830,12 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (!w)
goto out;
- if (strcmp (w, "status") == 0) {
+ if (strcmp (w, "create") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CREATE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp (w, "status") == 0) {
type = GF_GSYNC_OPTION_TYPE_STATUS;
if (slavei && !masteri)
@@ -1507,14 +1855,33 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
if (!masteri || !slavei)
goto out;
- } else if (strcmp(w, "log-rotate") == 0) {
- type = GF_GSYNC_OPTION_TYPE_ROTATE;
+ } else if (strcmp (w, "delete") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_DELETE;
- if (slavei && !masteri)
+ if (!masteri || !slavei)
goto out;
} else
GF_ASSERT (!"opword mismatch");
+ ret = force_push_pem_parse (words, wordcount, dict, &cmdi);
+ if (ret)
+ goto out;
+
+ if (!strcmp ((char *)words[wordcount-1], "detail")) {
+ if (strcmp ((char *)words[wordcount-2], "status")) {
+ ret = -1;
+ goto out;
+ }
+ if (!slavei || !masteri) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32 (dict, "status-detail", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
if (type != GF_GSYNC_OPTION_TYPE_CONFIG &&
(cmdi < wordcount - 1 || glob))
goto out;
@@ -1523,72 +1890,27 @@ cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
ret = 0;
- if (masteri)
+ if (masteri) {
ret = dict_set_str (dict, "master", (char *)words[masteri]);
+ if (!ret)
+ ret = dict_set_str (dict, "volname",
+ (char *)words[masteri]);
+ }
if (!ret && slavei)
ret = dict_set_str (dict, "slave", (char *)words[slavei]);
if (!ret)
ret = dict_set_int32 (dict, "type", type);
- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
- switch ((wordcount - 1) - cmdi) {
- case 0:
- subop = gf_strdup ("get-all");
- break;
- case 1:
- if (words[cmdi + 1][0] == '!') {
- (words[cmdi + 1])++;
- if (gf_asprintf (&subop, "del%s", glob ? "-glob" : "") == -1)
- subop = NULL;
- } else
- subop = gf_strdup ("get");
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
- break;
- default:
- if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
- subop = NULL;
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
-
- /* join the varargs by spaces to get the op_value */
-
- for (i = cmdi + 2; i < wordcount; i++)
- append_len += (strlen (words[i]) + 1);
- /* trailing strcat will add two bytes, make space for that */
- append_len++;
-
- append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
-
- for (i = cmdi + 2; i < wordcount; i++) {
- strcat (append_str, words[i]);
- strcat (append_str, " ");
- }
- append_str[append_len - 2] = '\0';
-
- ret = dict_set_dynstr (dict, "op_value", append_str);
- }
-
- if (!subop || dict_set_dynstr (dict, "subop", subop) != 0)
- ret = -1;
- }
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG)
+ ret = config_parse (words, wordcount, dict, cmdi, glob);
out:
if (ret) {
if (dict)
dict_destroy (dict);
- if (append_str)
- GF_FREE (append_str);
} else
*options = dict;
+
return ret;
}
@@ -1847,7 +2169,7 @@ cli_cmd_get_statusop (const char *arg)
uint32_t ret = GF_CLI_STATUS_NONE;
char *w = NULL;
char *opwords[] = {"detail", "mem", "clients", "fd",
- "inode", "callpool", NULL};
+ "inode", "callpool", "tasks", NULL};
struct {
char *opname;
uint32_t opcode;
@@ -1858,6 +2180,7 @@ cli_cmd_get_statusop (const char *arg)
{ "fd", GF_CLI_STATUS_FD },
{ "inode", GF_CLI_STATUS_INODE },
{ "callpool", GF_CLI_STATUS_CALLPOOL },
+ { "tasks", GF_CLI_STATUS_TASKS },
{ NULL }
};
@@ -1972,8 +2295,9 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,
if (!strcmp (words[3], "nfs")) {
if (cmd == GF_CLI_STATUS_FD ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD status not available"
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Tasks status not available"
" for NFS Servers");
ret = -1;
goto out;
@@ -1982,14 +2306,21 @@ cli_cmd_volume_status_parse (const char **words, int wordcount,
} else if (!strcmp (words[3], "shd")){
if (cmd == GF_CLI_STATUS_FD ||
cmd == GF_CLI_STATUS_CLIENTS ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD/Clients status not "
+ cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Detail/FD/Clients/Tasks status not "
"available for Self-heal Daemons");
ret = -1;
goto out;
}
cmd |= GF_CLI_STATUS_SHD;
} else {
+ if (cmd == GF_CLI_STATUS_TASKS) {
+ cli_err ("Tasks status not available for "
+ "bricks");
+ ret = -1;
+ goto out;
+ }
cmd |= GF_CLI_STATUS_BRICK;
ret = dict_set_str (dict, "brick", (char *)words[3]);
}
@@ -2138,12 +2469,103 @@ out:
return ret;
}
+static int
+extract_hostname_path_from_token (const char *tmp_words, char **hostname,
+ char **path)
+{
+ int ret = 0;
+ char *delimiter = NULL;
+ char *tmp_host = NULL;
+ char *host_name = NULL;
+ char *words = NULL;
+
+ *hostname = NULL;
+ *path = NULL;
+
+ words = GF_CALLOC (1, strlen (tmp_words) + 1, gf_common_mt_char);
+ if (!words){
+ ret = -1;
+ goto out;
+ }
+
+ strncpy (words, tmp_words, strlen (tmp_words) + 1);
+
+ if (validate_brick_name (words)) {
+ cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>", words);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr (words, ':');
+ ret = gf_canonicalize_path (delimiter + 1);
+ if (ret) {
+ goto out;
+ } else {
+ *path = GF_CALLOC (1, strlen (delimiter+1) +1,
+ gf_common_mt_char);
+ if (!*path) {
+ ret = -1;
+ goto out;
+
+ }
+ strncpy (*path, delimiter +1,
+ strlen(delimiter + 1) + 1);
+ }
+ }
+
+ tmp_host = gf_strdup (words);
+ if (!tmp_host) {
+ gf_log ("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name (tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
+ "memory");
+ goto out;
+ }
+ if (!(strcmp (host_name, "localhost") &&
+ strcmp (host_name, "127.0.0.1") &&
+ strncmp (host_name, "0.", 2))) {
+ cli_err ("Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address (host_name, _gf_false)) {
+ cli_err ("internet address '%s' does not conform to "
+ "standards", host_name);
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = GF_CALLOC (1, strlen (host_name) + 1,
+ gf_common_mt_char);
+ if (!*hostname) {
+ ret = -1;
+ goto out;
+ }
+ strncpy (*hostname, host_name, strlen (host_name) + 1);
+ ret = 0;
+
+out:
+ GF_FREE (words);
+ GF_FREE (tmp_host);
+ return ret;
+}
+
+
int
cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
dict_t **options)
{
int ret = 0;
dict_t *dict = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
dict = dict_new ();
if (!dict)
@@ -2165,6 +2587,11 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
ret = dict_set_int32 (dict, "heal-op",
GF_AFR_OP_HEAL_FULL);
goto done;
+ } else if (!strcmp (words[3], "statistics")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS);
+ goto done;
+
} else if (!strcmp (words[3], "info")) {
ret = dict_set_int32 (dict, "heal-op",
GF_AFR_OP_INDEX_SUMMARY);
@@ -2175,28 +2602,66 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
}
}
if (wordcount == 5) {
- if (strcmp (words[3], "info")) {
+ if (strcmp (words[3], "info") &&
+ strcmp (words[3], "statistics")) {
ret = -1;
goto out;
}
- if (!strcmp (words[4], "healed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEALED_FILES);
- goto done;
- }
- if (!strcmp (words[4], "heal-failed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEAL_FAILED_FILES);
- goto done;
+
+ if (!strcmp (words[3], "info")) {
+ if (!strcmp (words[4], "healed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEALED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "heal-failed")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_HEAL_FAILED_FILES);
+ goto done;
+ }
+ if (!strcmp (words[4], "split-brain")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
}
- if (!strcmp (words[4], "split-brain")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_SPLIT_BRAIN_FILES);
- goto done;
+
+ if (!strcmp (words[3], "statistics")) {
+ if (!strcmp (words[4], "heal-count")) {
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT);
+ goto done;
+ }
}
ret = -1;
goto out;
}
+ if (wordcount == 7) {
+ if (!strcmp (words[3], "statistics")
+ && !strcmp (words[4], "heal-count")
+ && !strcmp (words[5], "replica")) {
+
+ ret = dict_set_int32 (dict, "heal-op",
+ GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ ret = extract_hostname_path_from_token (words[6],
+ &hostname, &path);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-hostname",
+ hostname);
+ if (ret)
+ goto out;
+ ret = dict_set_dynstr (dict, "per-replica-cmd-path",
+ path);
+ if (ret)
+ goto out;
+ else
+ goto done;
+
+ }
+ }
ret = -1;
goto out;
done:
@@ -2209,3 +2674,1009 @@ out:
return ret;
}
+
+int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *option = NULL;
+ char *volname = NULL;
+ char *command = NULL;
+ gf_cli_defrag_type cmd = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ if (!((wordcount == 4) || (wordcount == 5)))
+ goto out;
+
+ if (wordcount == 4) {
+ if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
+ strcmp (words[3], "status"))
+ goto out;
+ } else {
+ if (strcmp (words[3], "fix-layout") &&
+ strcmp (words[3], "start"))
+ goto out;
+ }
+
+ volname = (char *) words[2];
+
+ if (wordcount == 4) {
+ command = (char *) words[3];
+ }
+ if (wordcount == 5) {
+ if ((strcmp (words[3], "fix-layout") ||
+ strcmp (words[4], "start")) &&
+ (strcmp (words[3], "start") ||
+ strcmp (words[4], "force"))) {
+ ret = -1;
+ goto out;
+ }
+ command = (char *) words[3];
+ option = (char *) words[4];
+ }
+
+ if (strcmp (command, "start") == 0) {
+ cmd = GF_DEFRAG_CMD_START;
+ if (option && strcmp (option, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
+ }
+ goto done;
+ }
+
+ if (strcmp (command, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
+ if (strcmp (command, "stop") == 0) {
+ cmd = GF_DEFRAG_CMD_STOP;
+ goto done;
+ }
+ if (strcmp (command, "status") == 0) {
+ cmd = GF_DEFRAG_CMD_STATUS;
+ }
+
+done:
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "rebalance-command", (int32_t) cmd);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_destroy (dict);
+
+ return ret;
+}
+
+int32_t
+cli_snap_create_desc_parse (dict_t *dict, const char **words,
+ size_t wordcount, int32_t desc_opt_loc)
+{
+ int32_t ret = -1;
+ char *desc = NULL;
+ int32_t desc_len = 0;
+
+ desc = GF_CALLOC (MAX_SNAP_DESCRIPTION_LEN + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+
+
+ if (strlen (words[desc_opt_loc]) >= MAX_SNAP_DESCRIPTION_LEN) {
+ cli_out ("snapshot create: description truncated: "
+ "Description provided is longer than 1024 characters");
+ desc_len = MAX_SNAP_DESCRIPTION_LEN;
+ } else {
+ desc_len = strlen (words[desc_opt_loc]);
+ }
+
+ strncpy (desc, words[desc_opt_loc], desc_len);
+ desc[desc_len] = '\0';
+ /* Calculating the size of the description as given by the user */
+
+ ret = dict_set_dynstr (dict, "description", desc);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap "
+ "description");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && desc)
+ GF_FREE (desc);
+
+ return ret;
+}
+
+/* Function to check whether the Volume name is repeated */
+int
+cli_check_if_volname_repeated (const char **words, unsigned int start_index,
+ uint64_t cur_index) {
+ uint64_t i = -1;
+ int ret = 0;
+
+ GF_ASSERT (words);
+
+ for (i = start_index ; i < cur_index ; i++) {
+ if (strcmp (words[i], words[cur_index]) == 0) {
+ ret = -1;
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* snapshot create <snapname> <vol-name(s)> [description <description>]
+ * [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_create_parse (dict_t *dict, const char **words, int wordcount) {
+ uint64_t i = 0;
+ int ret = -1;
+ uint64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *snapname = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot create)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount <= cmdi + 1) {
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Too less words for snap create command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err ("snapshot create: failed: snapname cannot exceed "
+ "255 characters.");
+ gf_log ("cli", GF_LOG_ERROR, "Snapname too long");
+
+ goto out;
+ }
+
+ snapname = (char *) words[cmdi];
+ for (i = 0 ; i < strlen (snapname); i++) {
+ /* Following volume name convention */
+ if (!isalnum (snapname[i]) && (snapname[i] != '_'
+ && (snapname[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err ("Snapname can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "name");
+ goto out;
+ }
+
+ /* Filling volume name in the dictionary */
+ for (i = cmdi + 1 ; i < wordcount
+ && (strcmp (words[i], "description")) != 0
+ && (strcmp (words[i], "force") != 0); i++) {
+ volcount++;
+ /* volume index starts from 1 */
+ ret = snprintf (key, sizeof (key),"volname%ld", volcount);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_str (dict, key, (char *)words[i]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not "
+ "save volume name");
+ goto out;
+ }
+
+ if (i >= cmdi + 2) {
+ ret = -1;
+ cli_err("Creating multiple volume snapshot is not "
+ "supported as of now");
+ goto out;
+ }
+ /* TODO : remove this above condition check once
+ * multiple volume snapshot is supported */
+ }
+
+ if (volcount == 0) {
+ ret = -1;
+ cli_err ("Please provide the volume name");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "volcount", volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ /* Verify how we got out of "for" loop,
+ * if it is by reaching wordcount limit then goto "out",
+ * because we need not parse for "description" and "force"
+ * after this.
+ */
+ if (i == wordcount) {
+ goto out;
+ }
+
+ if ((strcmp (words[i], "description")) == 0) {
+ ++i;
+ if (i > (wordcount - 1)) {
+ ret = -1;
+ cli_err ("Please provide a description");
+ gf_log ("cli", GF_LOG_ERROR,
+ "Description not provided");
+ goto out;
+ }
+
+ ret = cli_snap_create_desc_parse(dict, words, wordcount, i);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save snap "
+ "description");
+ goto out;
+ }
+
+ if ( i == (wordcount - 1))
+ goto out;
+ i++;
+ /* point the index to next word.
+ * As description might be follwed by force option.
+ * Before that, check if wordcount limit is reached
+ */
+ }
+
+ if ((strcmp (words[i], "force") != 0)) {
+ ret = -1;
+ cli_err ("Invalid Syntax.");
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = dict_set_int8 (dict, "snap-force", 1);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "snap force option");
+ goto out;
+ }
+
+ /* Check if the command has anything after "force" keyword */
+ if (++i < wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+
+out :
+ return ret;
+}
+
+/* snapshot list [volname]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_list_parse (dict_t *dict, const char **words, int wordcount) {
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount < 2 || wordcount > 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to save volname in dictionary");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot info [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_info_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot info)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If 3rd word is not "volume", then it must
+ * be snapname.
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save "
+ "snapname %s", words[cmdi]);
+ goto out;
+ }
+
+ /* Once snap name is parsed, if we encounter any other
+ * word then fail it. Invalid Syntax.
+ * example : snapshot info <snapname> word
+ */
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ cmd = GF_SNAP_INFO_TYPE_SNAP;
+ ret = 0;
+ goto out;
+ /* No need to continue the parsing once we
+ * get the snapname
+ */
+ }
+
+ /* If 3rd word is "volume", then check if next word
+ * is present. As, "snapshot info volume" is an
+ * invalid command.
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_INFO_TYPE_VOL;
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save "
+ "type of snapshot info");
+ }
+ }
+ return ret;
+}
+
+
+
+/* snapshot restore <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_restore_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snap-name %s",
+ words[2]);
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot delete <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancel the operation
+ */
+int
+cli_snap_delete_parse (dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state) {
+
+ int ret = -1;
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ question = "Deleting snap will erase all the information about "
+ "the snap. Do you still want to continue?";
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount != 3) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to save snapname %s",
+ words[2]);
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot delete operation");
+ goto out;
+ }
+out :
+ return ret;
+}
+
+/* snapshot status [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_status_parse (dict_t *dict, const char **words, int wordcount)
+{
+
+ int ret = -1;
+ int32_t cmd = GF_SNAP_STATUS_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot status)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if 3rd word is not "volume", then it must be "snapname"
+ */
+ if (strcmp (words[cmdi], "volume") != 0) {
+ ret = dict_set_str (dict, "snapname",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "snap name %s", words[cmdi]);
+ goto out;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+ cmd = GF_SNAP_STATUS_TYPE_SNAP;
+ goto out;
+ }
+
+ /* If 3rd word is "volume", then check if next word is present.
+ * As, "snapshot info volume" is an invalid command
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", (char *)words [wordcount - 1]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Count not save "
+ "volume name %s", words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_STATUS_TYPE_VOL;
+
+out :
+ if (ret == 0) {
+ ret = dict_set_int32 (dict, "cmd", cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not save cmd "
+ "of snapshot status");
+ }
+ }
+ return ret;
+}
+
+
+int32_t
+cli_snap_config_limit_parse (const char **words, dict_t *dict,
+ unsigned int wordcount, unsigned int index,
+ char *key)
+{
+ int ret = -1;
+ int limit = 0;
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (key);
+
+ if (index >= wordcount) {
+ ret = -1;
+ cli_err ("Please provide a value for %s.",key);
+ gf_log ("cli", GF_LOG_ERROR, "Value not provided for %s", key);
+ goto out;
+ }
+
+ limit = strtol (words[index], NULL, 0);
+ if (limit <= 0) {
+ ret = -1;
+ cli_err ("%s should be greater than 0.", key);
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, key, limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not set "
+ "%s in dictionary", key);
+ goto out;
+ }
+
+out :
+ return ret;
+}
+
+/* function cli_snap_config_parse
+ * Config Syntax : gluster snapshot config [volname]
+ * [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <count>]
+ *
+ return value: <0 on failure
+ 1 if user cancels the operation
+ 0 on success
+
+ NOTE : snap-max-soft-limit can only be set for system.
+*/
+int32_t
+cli_snap_config_parse (const char **words, int wordcount, dict_t *dict,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ gf_boolean_t vol_presence = _gf_false;
+ struct snap_config_opt_vals_ *conf_vals = NULL;
+ int8_t hard_limit = 0;
+ int8_t soft_limit = 0;
+ int8_t config_type = -1;
+ const char *question = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot config)*/
+
+ GF_ASSERT (words);
+ GF_ASSERT (dict);
+ GF_ASSERT (state);
+
+ if ((wordcount < 2) || (wordcount > 7)) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid wordcount(%d)", wordcount);
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+
+ /* Check whether the 3rd word is volname */
+ if (strcmp (words[cmdi], "snap-max-hard-limit") != 0
+ && strcmp (words[cmdi], "snap-max-soft-limit") != 0) {
+ ret = dict_set_str (dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set volname");
+ goto out;
+ }
+ cmdi++;
+ vol_presence = _gf_true;
+
+ if (cmdi == wordcount) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+ }
+
+ config_type = GF_SNAP_CONFIG_TYPE_SET;
+
+ if (strcmp (words[cmdi], "snap-max-hard-limit") == 0) {
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-hard-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config hard limit");
+ goto out;
+ }
+ hard_limit = 1;
+
+ if (++cmdi == wordcount) {
+ ret = 0;
+ goto set;
+ }
+ }
+
+ if (strcmp (words[cmdi], "snap-max-soft-limit") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err ("Soft limit cannot be set to individual "
+ "volumes.");
+ gf_log ("cli", GF_LOG_ERROR, "Soft limit cannot be "
+ "set to volumes");
+ goto out;
+ }
+
+ ret = cli_snap_config_limit_parse (words, dict, wordcount,
+ ++cmdi, "snap-max-soft-limit");
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse snap "
+ "config soft limit");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ soft_limit = 1;
+ } else {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ ret = 0; /* Success */
+
+set:
+ ret = dict_set_int32 (dict, "config-command", config_type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config-command");
+ goto out;
+ }
+
+ if (config_type == GF_SNAP_CONFIG_TYPE_SET) {
+ conf_vals = snap_confopt_vals;
+ if (hard_limit && soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_BOTH].question;
+ } else if (soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_SOFT].question;
+ } else if (hard_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_HARD].question;
+ }
+
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log ("cli", GF_LOG_DEBUG, "User cancelled "
+ "snapshot config operation");
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+validate_snapname (const char *snapname, char **opwords) {
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (snapname);
+ GF_ASSERT (opwords);
+
+ for (i = 0 ; opwords[i] != NULL; i++) {
+ if (strcmp (opwords[i], snapname) == 0) {
+ cli_out ("\"%s\" cannot be a snapname", snapname);
+ goto out;
+ }
+ }
+ ret = 0;
+out :
+ return ret;
+}
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
+ char *w = NULL;
+ char *opwords[] = {"create", "delete", "restore", "start",
+ "stop", "list", "status", "config",
+ "info", NULL};
+ char *invalid_snapnames[] = {"description", "force",
+ "volume", NULL};
+
+ GF_ASSERT (words);
+ GF_ASSERT (options);
+ GF_ASSERT (state);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ /* Lowest wordcount possible */
+ if (wordcount < 2) {
+ gf_log ("", GF_LOG_ERROR,
+ "Invalid command: Not enough arguments");
+ goto out;
+ }
+
+ w = str_getunamb (words[1], opwords);
+ if (!w) {
+ /* Checks if the operation is a valid operation */
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ if (!strcmp (w, "create")) {
+ type = GF_SNAP_OPTION_TYPE_CREATE;
+ } else if (!strcmp (w, "list")) {
+ type = GF_SNAP_OPTION_TYPE_LIST;
+ } else if (!strcmp (w, "info")) {
+ type = GF_SNAP_OPTION_TYPE_INFO;
+ } else if (!strcmp (w, "delete")) {
+ type = GF_SNAP_OPTION_TYPE_DELETE;
+ } else if (!strcmp (w, "config")) {
+ type = GF_SNAP_OPTION_TYPE_CONFIG;
+ } else if (!strcmp (w, "restore")) {
+ type = GF_SNAP_OPTION_TYPE_RESTORE;
+ } else if (!strcmp (w, "status")) {
+ type = GF_SNAP_OPTION_TYPE_STATUS;
+ }
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG) {
+ ret = dict_set_int32 (dict, "hold_snap_locks", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to set hold-snap-locks value "
+ "as _gf_true");
+ goto out;
+ }
+ }
+
+ /* Check which op is intended */
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ {
+ /* Syntax :
+ * gluster snapshot create <snapname> <vol-name(s)>
+ * [description <description>]
+ * [force]
+ */
+
+ /* In cases where the snapname is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2){
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR,
+ "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_snapname (words[2], invalid_snapnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_create_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "create command parsing failed.");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_INFO:
+ {
+ /* Syntax :
+ * gluster snapshot info [(snapname] | [vol <volname>)]
+ */
+ ret = cli_snap_info_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot info command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ {
+ /* Syntax :
+ * gluster snaphsot list [volname]
+ */
+
+ ret = cli_snap_list_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot list command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ {
+ /* Syntax :
+ * gluster snapshot delete <snapname>
+ */
+ ret = cli_snap_delete_parse (dict, words, wordcount,
+ state);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot delete command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ {
+ /* snapshot config [volname] [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <percent>] */
+ ret = cli_snap_config_parse (words, wordcount, dict,
+ state);
+ if (ret) {
+ if (ret < 0)
+ gf_log ("cli", GF_LOG_ERROR,
+ "config command parsing failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "type",
+ GF_SNAP_OPTION_TYPE_CONFIG);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to set "
+ "config type");
+ ret = -1;
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ {
+ /* Syntax :
+ * gluster snapshot status [(snapname |
+ * volume <volname>)]
+ */
+ ret = cli_snap_status_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "snapshot status command");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ {
+ /* Syntax:
+ * snapshot restore <snapname>
+ */
+ ret = cli_snap_restore_parse (dict, words, wordcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to parse "
+ "restore command");
+ goto out;
+ }
+ break;
+ }
+ default:
+ gf_log ("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ break;
+ }
+
+ ret = dict_set_int32 (dict, "type", type);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to set type.");
+ goto out;
+ }
+ /* If you got so far, input is valid */
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_destroy (dict);
+ } else
+ *options = dict;
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index 4ac1630e5..551312411 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -51,6 +41,7 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *dict = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
if (!(wordcount == 3)) {
cli_usage_out (word->pattern);
@@ -76,8 +67,10 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (ret == 1) {
ret = 0;
} else {
+ cli_out ("%s is an invalid address", words[2]);
cli_usage_out (word->pattern);
parse_error = 1;
+ ret = -1;
goto out;
}
/* if (words[3]) {
@@ -86,6 +79,9 @@ cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
*/
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -114,6 +110,7 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
int flags = 0;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
if ((wordcount < 3) || (wordcount > 4)) {
cli_usage_out (word->pattern);
@@ -153,6 +150,8 @@ cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (ret)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -192,7 +191,7 @@ cli_cmd_peer_status_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
if (proc->fn) {
- ret = proc->fn (frame, THIS, (char *)words[1] );
+ ret = proc->fn (frame, THIS, (void *)GF_CLI_LIST_PEERS);
}
out:
@@ -207,6 +206,45 @@ out:
return ret;
}
+int
+cli_cmd_pool_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 2) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn (frame, THIS,
+ (void *)GF_CLI_LIST_POOL_NODES);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err ("pool list: command execution failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
struct cli_cmd cli_probe_cmds[] = {
{ "peer probe <HOSTNAME>",
cli_cmd_peer_probe_cbk,
@@ -224,6 +262,10 @@ struct cli_cmd cli_probe_cmds[] = {
cli_cmd_peer_help_cbk,
"Help command for peer "},
+ { "pool list",
+ cli_cmd_pool_list_cbk,
+ "list all the nodes in the pool (including localhost)"},
+
{ NULL, NULL, NULL }
};
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
new file mode 100644
index 000000000..de492d683
--- /dev/null
+++ b/cli/src/cli-cmd-snapshot.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "cli.h"
+#include "cli-cmd.h"
+
+extern rpc_clnt_prog_t *cli_rpc_prog;
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_snapshot_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable [GLUSTER_CLI_SNAP];
+ if (proc == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Parses the command entered by the user */
+ ret = cli_cmd_snapshot_parse (words, wordcount, &options, state);
+ if (ret) {
+ if (ret < 0) {
+ cli_usage_out (word->pattern);
+ parse_err = 1;
+ }
+ else {
+ /* User might have cancelled the snapshot operation */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn (frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out ("Snapshot command failed");
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
+struct cli_cmd snapshot_cmds[] = {
+ { "snapshot help",
+ cli_cmd_snapshot_help_cbk,
+ "display help for snapshot commands"
+ },
+ { "snapshot create <snapname> <volname(s)> [description <description>] [force]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Create."
+ },
+ { "snapshot restore <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Restore."
+ },
+ { "snapshot status [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Status."
+ },
+ { "snapshot info [(snapname | volume <volname>)]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Info."
+ },
+ { "snapshot list [volname]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot List."
+ },
+ {"snapshot config [volname] [snap-max-hard-limit <count>] [snap-max-soft-limit <percent>]",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Config."
+ },
+ {"snapshot delete <snapname>",
+ cli_cmd_snapshot_cbk,
+ "Snapshot Delete."
+ },
+ { NULL, NULL, NULL }
+};
+
+int
+cli_cmd_snapshot_help_cbk (struct cli_state *state,
+ struct cli_cmd_word *in_word,
+ const char **words,
+ int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++)
+ if (_gf_false == cmd->disable)
+ cli_out ("%s - %s", cmd->pattern, cmd->desc);
+
+ return 0;
+}
+
+int
+cli_cmd_snapshot_register (struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++) {
+
+ ret = cli_cmd_register (&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
index 25938b897..8cfa5e70c 100644
--- a/cli/src/cli-cmd-system.c
+++ b/cli/src/cli-cmd-system.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -41,6 +31,12 @@ extern rpc_clnt_prog_t *cli_rpc_prog;
int cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
+int cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
int
cli_cmd_getspec_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
@@ -288,6 +284,114 @@ cli_cmd_umount_cbk (struct cli_state *state, struct cli_cmd_word *word,
return ret;
}
+int
+cli_cmd_uuid_get_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_GET];
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ if (proc->fn)
+ ret = proc->fn (frame, this, dict);
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid get failed");
+ }
+
+ if (dict)
+ dict_unref (dict);
+
+ CLI_STACK_DESTROY (frame);
+ return ret;
+}
+
+int
+cli_cmd_uuid_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char *question = NULL;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ question = "Resetting uuid changes the uuid of local glusterd. "
+ "Do you want to continue?";
+
+ if (wordcount != 3) {
+ cli_usage_out (word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_RESET];
+
+ this = THIS;
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ answer = cli_cmd_get_confirmation (state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ //send NULL as argument since no dictionary is sent to glusterd
+ if (proc->fn) {
+ ret = proc->fn (frame, this, dict);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get (&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out ("uuid reset failed");
+ }
+
+ CLI_STACK_DESTROY (frame);
+
+ return ret;
+}
+
struct cli_cmd cli_system_cmds[] = {
{ "system:: getspec <VOLID>",
cli_cmd_getspec_cbk,
@@ -313,14 +417,163 @@ struct cli_cmd cli_system_cmds[] = {
cli_cmd_umount_cbk,
"request an umount"},
+ { "system:: uuid get",
+ cli_cmd_uuid_get_cbk,
+ "get uuid of glusterd"},
+
+ { "system:: uuid reset",
+ cli_cmd_uuid_reset_cbk,
+ "reset the uuid of glusterd"},
+
{ "system:: help",
cli_cmd_system_help_cbk,
"display help for system commands"},
+ { "system:: copy file [<filename>]",
+ cli_cmd_copy_file_cbk,
+ "Copy file from current node's $working_dir to "
+ "$working_dir of all cluster nodes"},
+
+ { "system:: execute <command> <args>",
+ cli_cmd_sys_exec_cbk,
+ "Execute the command on all the nodes "
+ "in the cluster and display their output."},
+
{ NULL, NULL, NULL }
};
int
+cli_cmd_sys_exec_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ char cmd_arg_name[PATH_MAX] = "";
+ char *command = NULL;
+ char *saveptr = NULL;
+ char *tmp = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int in_cmd_args_count = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ command = strtok_r ((char *)words[2], " ", &saveptr);
+ do {
+ tmp = strtok_r (NULL, " ", &saveptr);
+ if (tmp) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name, tmp);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set "
+ "%s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+ } while (tmp);
+
+ cmd_args_count = wordcount - 3;
+
+ ret = dict_set_str (dict, "command", command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set command in dict");
+ goto out;
+ }
+
+ for (i=1; i <= cmd_args_count; i++) {
+ in_cmd_args_count++;
+ memset (cmd_arg_name, '\0', sizeof(cmd_arg_name));
+ snprintf (cmd_arg_name, sizeof(cmd_arg_name),
+ "cmd_arg_%d", in_cmd_args_count);
+ ret = dict_set_str (dict, cmd_arg_name,
+ (char *)words[2+i]);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set %s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32 (dict, "cmd_args_count", in_cmd_args_count);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to set cmd_args_count in dict");
+ goto out;
+ }
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYS_EXEC];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_copy_file_cbk (struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *filename = "";
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out (word->pattern);
+ goto out;
+ }
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ filename = (char*)words[3];
+ ret = dict_set_str (dict, "source", filename);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set filename in dict");
+
+ ret = dict_set_str (dict, "volname", "N/A");
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to set volname in dict");
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_COPY_FILE];
+ if (proc && proc->fn) {
+ frame = create_frame (THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+ ret = proc->fn (frame, THIS, (void*)dict);
+ }
+out:
+ return ret;
+}
+
+int
cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 704f9dddb..100be0b73 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -121,6 +111,12 @@ cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
int sent = 0;
int parse_error = 0;
dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = "Sync volume may make data "
+ "inaccessible while the sync "
+ "is in progress. Do you want "
+ "to continue?";
if ((wordcount < 3) || (wordcount > 4)) {
cli_usage_out (word->pattern);
@@ -155,12 +151,22 @@ cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ if (!(state->mode & GLUSTER_MODE_SCRIPT)) {
+ answer = cli_cmd_get_confirmation (state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
+
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -172,9 +178,6 @@ out:
cli_out ("Volume sync failed");
}
- if (dict)
- dict_unref (dict);
-
CLI_STACK_DESTROY (frame);
return ret;
@@ -317,13 +320,11 @@ found_bad_brick_order:
out:
ai_list_tmp2 = NULL;
i = 0;
- if (brick_list_dup)
- GF_FREE (brick_list_dup);
+ GF_FREE (brick_list_dup);
list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
if (ai_list_tmp1->info)
freeaddrinfo (ai_list_tmp1->info);
- if (ai_list_tmp2)
- free (ai_list_tmp2);
+ free (ai_list_tmp2);
ai_list_tmp2 = ai_list_tmp1;
}
free (ai_list_tmp2);
@@ -344,7 +345,7 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
int32_t brick_count = 0;
int32_t sub_count = 0;
int32_t type = GF_CLUSTER_TYPE_NONE;
-
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
@@ -390,13 +391,23 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
}
+
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -421,6 +432,8 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
const char *question = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
question = "Deleting volume will erase all information about the volume. "
"Do you want to continue?";
@@ -430,6 +443,10 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
if (wordcount != 3) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -445,8 +462,17 @@ cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
volname = (char *)words[2];
+ ret = dict_set_str (dict, "volname", volname);
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
- ret = proc->fn (frame, THIS, volname);
+ ret = proc->fn (frame, THIS, dict);
}
out:
@@ -472,6 +498,7 @@ cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
int parse_error = 0;
dict_t *dict = NULL;
int flags = 0;
+ cli_local_t *local = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
@@ -522,13 +549,13 @@ cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
out:
- if (dict)
- dict_unref (dict);
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -545,7 +572,7 @@ cli_cmd_get_confirmation (struct cli_state *state, const char *question)
{
char answer[5] = {'\0', };
char flush = '\0';
- int len = 0;
+ size_t len;
if (state->mode & GLUSTER_MODE_SCRIPT)
return GF_ANSWER_YES;
@@ -559,7 +586,7 @@ cli_cmd_get_confirmation (struct cli_state *state, const char *question)
len = strlen (answer);
- if (answer [len - 1] == '\n'){
+ if (len && answer [len - 1] == '\n'){
answer [--len] = '\0';
} else {
do{
@@ -595,6 +622,7 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
int parse_error = 0;
dict_t *dict = NULL;
char *volname = NULL;
+ cli_local_t *local = NULL;
const char *question = "Stopping volume will make its data inaccessible. "
"Do you want to continue?";
@@ -644,6 +672,8 @@ cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
@@ -654,8 +684,6 @@ out:
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume stop on '%s' failed", volname);
}
- if (dict)
- dict_unref (dict);
CLI_STACK_DESTROY (frame);
@@ -730,6 +758,7 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *dict = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
cli_out ("Command not supported on Solaris");
goto out;
@@ -739,70 +768,22 @@ cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = cli_cmd_volume_defrag_parse (words, wordcount, &dict);
- if (!((wordcount == 4) || (wordcount == 5))) {
+ if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
- goto out;
- }
-
- if (wordcount == 4) {
- if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
- strcmp (words[3], "status")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- } else {
- if (strcmp (words[3], "fix-layout") &&
- strcmp (words[3], "start")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
-
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret)
- goto out;
-
- if (wordcount == 4) {
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
- }
- if (wordcount == 5) {
- if ((strcmp (words[3], "fix-layout") ||
- strcmp (words[4], "start")) &&
- (strcmp (words[3], "start") ||
- strcmp (words[4], "force"))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str (dict, "option", (char *)words[4]);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
}
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, dict);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, dict);
}
out:
- if (dict)
- dict_destroy (dict);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -820,11 +801,11 @@ cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
{
int sent = 0;
int parse_error = 0;
-
int ret = -1;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
@@ -833,21 +814,19 @@ cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
ret = cli_cmd_volume_reset_parse (words, wordcount, &options);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -871,6 +850,7 @@ cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
ret = cli_cmd_volume_profile_parse (words, wordcount, &options);
@@ -886,14 +866,13 @@ cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -917,6 +896,8 @@ cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
@@ -924,22 +905,25 @@ cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
- ret = cli_cmd_volume_set_parse (words, wordcount, &options);
-
+ ret = cli_cmd_volume_set_parse (words, wordcount, &options, &op_errstr);
if (ret) {
- cli_usage_out (word->pattern);
+ if (op_errstr) {
+ cli_err ("%s", op_errstr);
+ GF_FREE (op_errstr);
+ } else
+ cli_usage_out (word->pattern);
+
parse_error = 1;
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -964,6 +948,7 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,
int sent = 0;
int parse_error = 0;
gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
const char *question = "Changing the 'stripe count' of the volume is "
"not a supported feature. In some cases it may result in data "
@@ -976,7 +961,6 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,
goto out;
ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -994,16 +978,24 @@ cli_cmd_volume_add_brick_cbk (struct cli_state *state,
}
}
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
+
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -1027,6 +1019,7 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
call_frame_t *frame = NULL;
dict_t *options = NULL;
gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
const char *question = "Disabling quota will delete all the quota "
"configuration. Do you want to continue?";
@@ -1043,6 +1036,7 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
}
ret = cli_cmd_quota_parse (words, wordcount, &options);
+
if (ret < 0) {
cli_usage_out (word->pattern);
parse_err = 1;
@@ -1054,13 +1048,12 @@ cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn)
ret = proc->fn (frame, THIS, options);
out:
- if (options)
- dict_unref (options);
-
if (ret && parse_err == 0)
cli_out ("Quota command failed");
@@ -1083,6 +1076,7 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
int sent = 0;
int parse_error = 0;
int need_question = 0;
+ cli_local_t *local = NULL;
const char *question = "Removing brick(s) can result in data loss. "
"Do you want to Continue?";
@@ -1093,7 +1087,6 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
&need_question);
-
if (ret) {
cli_usage_out (word->pattern);
parse_error = 1;
@@ -1111,6 +1104,8 @@ cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1122,9 +1117,6 @@ out:
cli_out ("Volume remove-brick failed");
}
- if (options)
- dict_unref (options);
-
CLI_STACK_DESTROY (frame);
return ret;
@@ -1143,6 +1135,7 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
cli_out ("Command not supported on Solaris");
@@ -1162,14 +1155,22 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
goto out;
}
+ if (state->mode & GLUSTER_MODE_SCRIPT) {
+ ret = dict_set_int32 (options, "force", _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to set force"
+ "option");
+ goto out;
+ }
+ }
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -1202,6 +1203,7 @@ cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
ret = cli_cmd_volume_top_parse (words, wordcount, &options);
@@ -1217,14 +1219,13 @@ cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (!frame)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_unref (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
@@ -1248,6 +1249,7 @@ cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
if (!((wordcount == 4) || (wordcount == 5))) {
cli_usage_out (word->pattern);
@@ -1265,20 +1267,18 @@ cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
if (ret)
goto out;
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
out:
- if (options)
- dict_destroy (options);
-
if (ret) {
cli_cmd_sent_status_get (&sent);
if ((sent == 0) && (parse_error == 0))
cli_out ("Volume log rotate failed");
}
-
CLI_STACK_DESTROY (frame);
return ret;
@@ -1353,6 +1353,7 @@ cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word
dict_t *options = NULL;
rpc_clnt_procedure_t *proc = NULL;
call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
proc = &cli_rpc_prog->proctable [GLUSTER_CLI_GSYNC_SET];
if (proc == NULL) {
@@ -1373,13 +1374,12 @@ cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word
goto out;
}
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn)
ret = proc->fn (frame, THIS, options);
out:
- if (options)
- dict_unref (options);
-
if (ret && parse_err == 0)
cli_out (GEOREP" command failed");
@@ -1398,6 +1398,7 @@ cli_cmd_volume_status_cbk (struct cli_state *state,
call_frame_t *frame = NULL;
dict_t *dict = NULL;
uint32_t cmd = 0;
+ cli_local_t *local = NULL;
ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
@@ -1425,12 +1426,11 @@ cli_cmd_volume_status_cbk (struct cli_state *state,
if (!frame)
goto out;
- ret = proc->fn (frame, THIS, dict);
+ CLI_LOCAL_INIT (local, words, frame, dict);
- out:
- if (dict)
- dict_unref (dict);
+ ret = proc->fn (frame, THIS, dict);
+out:
CLI_STACK_DESTROY (frame);
return ret;
@@ -1523,7 +1523,10 @@ void
cli_print_detailed_status (cli_volume_status_t *status)
{
cli_out ("%-20s : %-20s", "Brick", status->brick);
- cli_out ("%-20s : %-20d", "Port", status->port);
+ if (status->online)
+ cli_out ("%-20s : %-20d", "Port", status->port);
+ else
+ cli_out ("%-20s : %-20s", "Port", "N/A");
cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
cli_out ("%-20s : %-20s", "Pid", status->pid_str);
@@ -1582,31 +1585,34 @@ int
cli_print_brick_status (cli_volume_status_t *status)
{
int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
- char buf[80] = {0,};
int bricklen = 0;
- int i = 0;
char *p = NULL;
int num_tabs = 0;
- bricklen = strlen (status->brick);
p = status->brick;
+ bricklen = strlen (p);
while (bricklen > 0) {
if (bricklen > fieldlen) {
- i++;
- strncpy (buf, p, min (fieldlen, (sizeof (buf)-1)));
- buf[strlen(buf) + 1] = '\0';
- cli_out ("%s", buf);
- p = status->brick + i * fieldlen;
+ cli_out ("%.*s", fieldlen, p);
+ p += fieldlen;
bricklen -= fieldlen;
} else {
num_tabs = (fieldlen - bricklen) / CLI_TAB_LENGTH + 1;
printf ("%s", p);
while (num_tabs-- != 0)
printf ("\t");
- if (status->port)
- cli_out ("%d\t%c\t%s",
- status->port, status->online?'Y':'N',
- status->pid_str);
+ if (status->port) {
+ if (status->online)
+ cli_out ("%d\t%c\t%s",
+ status->port,
+ status->online?'Y':'N',
+ status->pid_str);
+ else
+ cli_out ("%s\t%c\t%s",
+ "N/A",
+ status->online?'Y':'N',
+ status->pid_str);
+ }
else
cli_out ("%s\t%c\t%s",
"N/A", status->online?'Y':'N',
@@ -1629,6 +1635,7 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
int parse_error = 0;
dict_t *options = NULL;
xlator_t *this = NULL;
+ cli_local_t *local = NULL;
this = THIS;
frame = create_frame (this, this->ctx->pool);
@@ -1650,6 +1657,8 @@ cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1661,9 +1670,6 @@ out:
cli_out ("Volume heal failed");
}
- if (options)
- dict_unref (options);
-
CLI_STACK_DESTROY (frame);
return ret;
@@ -1679,6 +1685,7 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
@@ -1707,6 +1714,9 @@ cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word
goto out;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1764,6 +1774,7 @@ cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
dict_t *options = NULL;
int sent = 0;
int parse_error = 0;
+ cli_local_t *local = NULL;
frame = create_frame (THIS, THIS->ctx->pool);
if (!frame)
@@ -1793,6 +1804,9 @@ cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
goto out;
proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+
+ CLI_LOCAL_INIT (local, words, frame, options);
+
if (proc->fn) {
ret = proc->fn (frame, THIS, options);
}
@@ -1814,7 +1828,13 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_info_cbk,
"list information of all volumes"},
- { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ...",
+ { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] "
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK>"
+#ifdef HAVE_BD_XLATOR
+ "?<vg_name>"
+#endif
+ "... [force]",
+
cli_cmd_volume_create_cbk,
"create a new volume of specified type with mentioned bricks"},
@@ -1834,11 +1854,11 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_rename_cbk,
"rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
- { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ...",
+ { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ... [force]",
cli_cmd_volume_add_brick_cbk,
"add brick to volume <VOLNAME>"},
- { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... {start|stop|status|commit|force}",
+ { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... [start|stop|status|commit|force]",
cli_cmd_volume_remove_brick_cbk,
"remove brick from volume <VOLNAME>"},
@@ -1846,7 +1866,7 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_volume_defrag_cbk,
"rebalance operations"},
- { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start|pause|abort|status|commit [force]}",
+ { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start [force]|pause|abort|status|commit [force]}",
cli_cmd_volume_replace_brick_cbk,
"replace-brick operations"},
@@ -1875,13 +1895,14 @@ struct cli_cmd volume_cmds[] = {
"reset all the reconfigured options"},
#if (SYNCDAEMON_COMPILE)
- {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {start|stop|config|status|log-rotate} [options...]",
+ {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {create [push-pem] [force]"
+ "|start [force]|stop [force]|config|status [detail]|delete} [options...]",
cli_cmd_volume_gsync_set_cbk,
"Geo-sync operations",
cli_cmd_check_gsync_exists_cbk},
#endif
- { "volume profile <VOLNAME> {start|info|stop} [nfs]",
+ { "volume profile <VOLNAME> {start|stop|info [nfs]}",
cli_cmd_volume_profile_cbk,
"volume profile operations"},
@@ -1889,18 +1910,17 @@ struct cli_cmd volume_cmds[] = {
cli_cmd_quota_cbk,
"quota translator specific operations"},
- { "volume top <VOLNAME> {[open|read|write|opendir|readdir [nfs]] "
- "|[read-perf|write-perf [nfs|{bs <size> count <count>}]]"
- "|[clear [nfs]]} [brick <brick>] [list-cnt <count>]",
+ { "volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] |\n"
+ "volume top <VOLNAME> {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>]",
cli_cmd_volume_top_cbk,
"volume top operations"},
{ "volume status [all | <VOLNAME> [nfs|shd|<BRICK>]]"
- " [detail|clients|mem|inode|fd|callpool]",
+ " [detail|clients|mem|inode|fd|callpool|tasks]",
cli_cmd_volume_status_cbk,
"display status of all or specified volume(s)/brick"},
- { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]",
+ { "volume heal <VOLNAME> [{full | statistics {heal-count {replica <hostname:brickname>}} |info {healed | heal-failed | split-brain}}]",
cli_cmd_volume_heal_cbk,
"self-heal commands on volume specified by <VOLNAME>"},
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
index f2b434ac7..b81f75b5b 100644
--- a/cli/src/cli-cmd.c
+++ b/cli/src/cli-cmd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -211,8 +201,7 @@ cli_cmd_process_line (struct cli_state *state, const char *text)
ret = cli_cmd_process (state, count, tokens);
out:
- if (copy)
- free (copy);
+ free (copy);
if (tokens)
cli_cmd_tokens_destroy (tokens);
@@ -242,6 +231,9 @@ cli_cmds_register (struct cli_state *state)
if (ret)
goto out;
+ ret = cli_cmd_snapshot_register (state);
+ if (ret)
+ goto out;
out:
return ret;
}
@@ -371,8 +363,11 @@ cli_cmd_submit (void *req, call_frame_t *frame,
int ret = -1;
unsigned timeout = 0;
- timeout = (GLUSTER_CLI_PROFILE_VOLUME == procnum) ?
- CLI_TOP_CMD_TIMEOUT : CLI_DEFAULT_CMD_TIMEOUT;
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+ (GLUSTER_CLI_HEAL_VOLUME == procnum))
+ timeout = CLI_TEN_MINUTES_TIMEOUT;
+ else
+ timeout = CLI_DEFAULT_CMD_TIMEOUT;
cli_cmd_lock ();
cmd_sent = 0;
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
index ba877e2c4..041729276 100644
--- a/cli/src/cli-cmd.h
+++ b/cli/src/cli-cmd.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_CMD_H__
#define __CLI_CMD_H__
@@ -30,6 +20,19 @@
#include "cli.h"
#include "list.h"
+#define CLI_LOCAL_INIT(local, words, frame, dictionary) \
+ do { \
+ local = cli_local_get (); \
+ \
+ if (local) { \
+ local->words = words; \
+ if (dictionary) \
+ local->dict = dictionary; \
+ if (frame) \
+ frame->local = local; \
+ } \
+ } while (0)
+
#define CLI_STACK_DESTROY(_frame) \
do { \
if (_frame) { \
@@ -90,6 +93,8 @@ int cli_cmd_probe_register (struct cli_state *state);
int cli_cmd_system_register (struct cli_state *state);
+int cli_cmd_snapshot_register (struct cli_state *state);
+
int cli_cmd_misc_register (struct cli_state *state);
struct cli_cmd_word *cli_cmd_nextword (struct cli_cmd_word *word,
@@ -115,4 +120,5 @@ cli_cmd_submit (void *req, call_frame_t *frame,
gf_answer_t
cli_cmd_get_confirmation (struct cli_state *state, const char *question);
int cli_cmd_sent_status_get (int *status);
+
#endif /* __CLI_CMD_H__ */
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
index 3c49d2183..09fcb639b 100644
--- a/cli/src/cli-mem-types.h
+++ b/cli/src/cli-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_MEM_TYPES_H__
#define __CLI_MEM_TYPES_H__
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
index f9bf7c819..ade1c8ebb 100644
--- a/cli/src/cli-rl.c
+++ b/cli/src/cli-rl.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -226,8 +216,7 @@ cli_rl_tokenize (const char *text)
}
out:
- if (copy)
- free (copy);
+ free (copy);
if (i < count) {
cli_cmd_tokens_destroy (tokens);
@@ -374,9 +363,10 @@ cli_rl_input (void *_data)
for (;;) {
line = readline (state->prompt);
if (!line)
- break;
+ exit(0); //break;
- cli_rl_process_line (line);
+ if (*line)
+ cli_rl_process_line (line);
free (line);
}
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 2ab13261a..bfeb854ad 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1,32 +1,18 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
#endif
-#ifndef GSYNC_CONF
-#define GSYNC_CONF GEOREP"/gsyncd.conf"
-#endif
-
/* Widths of various columns in top read/write-perf output
* Total width of top read/write-perf should be 80 chars
* including one space between column
@@ -36,6 +22,8 @@
#define VOL_TOP_PERF_SPEED_WIDTH 4
#define VOL_TOP_PERF_TIME_WIDTH 26
+#define INDENT_MAIN_HEAD "%-25s %s "
+
#include "cli.h"
#include "compat-errno.h"
#include "cli-cmd.h"
@@ -58,25 +46,39 @@ extern rpc_clnt_prog_t *cli_rpc_prog;
extern int cli_op_ret;
extern int connected;
-char *cli_volume_type[] = {"Distribute",
- "Stripe",
- "Replicate",
- "Striped-Replicate",
- "Distributed-Stripe",
- "Distributed-Replicate",
- "Distributed-Striped-Replicate",
-};
-
-
-char *cli_volume_status[] = {"Created",
- "Started",
- "Stopped"
+char *cli_vol_type_str[] = {"Distribute",
+ "Stripe",
+ "Replicate",
+ "Striped-Replicate",
+ "Distributed-Stripe",
+ "Distributed-Replicate",
+ "Distributed-Striped-Replicate",
+ };
+
+char *cli_vol_status_str[] = {"Created",
+ "Started",
+ "Stopped",
+ };
+
+char *cli_vol_task_status_str[] = {"not started",
+ "in progress",
+ "stopped",
+ "completed",
+ "failed",
+ "fix-layout in progress",
+ "fix-layout stopped",
+ "fix-layout completed",
+ "fix-layout failed",
};
int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
void *data);
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref);
rpc_clnt_prog_t cli_handshake_prog = {
.progname = "cli handshake",
@@ -91,10 +93,10 @@ rpc_clnt_prog_t cli_pmap_prog = {
};
int
-gf_cli3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_probe_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_probe_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
char msg[1024] = {0,};
@@ -102,98 +104,38 @@ gf_cli3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_probe_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- if (!rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_PROBE_SUCCESS:
- snprintf (msg, sizeof (msg),
- "Probe successful");
- break;
- case GF_PROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "Probe on localhost not needed");
- break;
- case GF_PROBE_FRIEND:
- snprintf (msg, sizeof (msg),
- "Probe on host %s port %d already"
- " in peer list", rsp.hostname,
- rsp.port);
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Probe returned with unknown errno %d",
- rsp.op_errno);
- break;
- }
- }
-
- if (rsp.op_ret) {
- if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- } else {
- switch (rsp.op_errno) {
- case GF_PROBE_ANOTHER_CLUSTER:
- snprintf (msg, sizeof (msg),
- "%s is already part of "
- "another cluster",
- rsp.hostname);
- break;
- case GF_PROBE_VOLUME_CONFLICT:
- snprintf (msg, sizeof (msg),
- "Atleast one volume on %s "
- "conflicts with existing "
- "volumes in the cluster",
- rsp.hostname);
- break;
- case GF_PROBE_UNKNOWN_PEER:
- snprintf (msg, sizeof (msg),
- "%s responded with 'unknown "
- "peer' error, this could "
- "happen if %s doesn't have "
- "localhost in its peer "
- "database", rsp.hostname,
- rsp.hostname);
- break;
- case GF_PROBE_ADD_FAILED:
- snprintf (msg, sizeof (msg),
- "Failed to add peer "
- "information on %s" ,
- rsp.hostname);
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Probe unsuccessful\nProbe "
- "returned with unknown errno "
- "%d", rsp.op_errno);
- break;
- }
- }
- gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
+ snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ if (rsp.op_ret)
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerProbe", msg, rsp.op_ret,
- rsp.op_errno, NULL);
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
+
if (!rsp.op_ret)
- cli_out ("%s", msg);
+ cli_out ("peer probe: success. %s", msg);
else
- cli_err ("%s", msg);
+ cli_err ("peer probe: failed: %s", msg);
ret = rsp.op_ret;
@@ -203,82 +145,52 @@ out:
}
int
-gf_cli3_1_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
- gf1_cli_deprobe_rsp rsp = {0,};
+ gf_cli_rsp rsp = {0,};
int ret = -1;
- char msg[1024] = {0,};
+ char msg[1024] = {0,};
if (-1 == req->rpc_status) {
goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_deprobe_rsp);
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to deprobe");
+
if (rsp.op_ret) {
if (strlen (rsp.op_errstr) > 0) {
snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
gf_log ("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
- } else {
- switch (rsp.op_errno) {
- case GF_DEPROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "%s is localhost",
- rsp.hostname);
- break;
- case GF_DEPROBE_NOT_FRIEND:
- snprintf (msg, sizeof (msg),
- "%s is not part of cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_BRICK_EXIST:
- snprintf (msg, sizeof (msg),
- "Brick(s) with the peer %s "
- "exist in cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_FRIEND_DOWN:
- snprintf (msg, sizeof (msg),
- "One of the peers is probably"
- " down. Check with 'peer "
- "status'.");
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Detach unsuccessful\nDetach"
- " returned with unknown "
- "errno %d", rsp.op_errno);
- break;
- }
- gf_log ("cli", GF_LOG_ERROR,"Detach failed with op_ret "
- "%d and op_errno %d", rsp.op_ret, rsp.op_errno);
}
} else {
- snprintf (msg, sizeof (msg), "Detach successful");
+ snprintf (msg, sizeof (msg), "success");
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerDetach", msg, rsp.op_ret,
- rsp.op_errno, NULL);
+ ret = cli_xml_output_str (NULL,
+ (rsp.op_ret)? NULL : msg,
+ rsp.op_ret, rsp.op_errno,
+ (rsp.op_ret)? msg : NULL);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
+
if (!rsp.op_ret)
- cli_out ("%s", msg);
+ cli_out ("peer detach: %s", msg);
else
- cli_err ("%s", msg);
+ cli_err ("peer detach: failed: %s", msg);
ret = rsp.op_ret;
@@ -288,34 +200,143 @@ out:
}
int
-gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+gf_cli_output_peer_status (dict_t *dict, int count)
{
- gf1_cli_peer_list_rsp rsp = {0,};
int ret = -1;
- dict_t *dict = NULL;
char *uuid_buf = NULL;
char *hostname_buf = NULL;
int32_t i = 1;
char key[256] = {0,};
char *state = NULL;
- int32_t port = 0;
int32_t connected = 0;
char *connected_str = NULL;
+ cli_out ("Number of Peers: %d", count);
+ i = 1;
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+
+ snprintf (key, 256, "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state);
+ if (ret)
+ goto out;
+
+ cli_out ("\nHostname: %s\nUuid: %s\nState: %s (%s)",
+ hostname_buf, uuid_buf, state, connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_cli_output_pool_list (dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {0,};
+ int32_t connected = 0;
+ char *connected_str = NULL;
+
+ if (count >= 1)
+ cli_out ("UUID\t\t\t\t\tHostname\tState");
+
+ while ( i <= count) {
+ snprintf (key, 256, "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname_buf);
+ if (ret)
+ goto out;
+
+ snprintf (key, 256, "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+ cli_out ("%s\t%-9s\t%s ", uuid_buf, hostname_buf,
+ connected_str);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* function pointer for gf_cli_output_{pool_list,peer_status} */
+typedef int (*cli_friend_output_fn) (dict_t*, int);
+
+int
+gf_cli_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf1_cli_peer_list_rsp rsp = {0,};
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {0,};
+ char *cmd = NULL;
+ cli_friend_output_fn friend_output_fn;
+ call_frame_t *frame = NULL;
+ unsigned long flags = 0;
+
+ frame = myframe;
+ flags = (long)frame->local;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ cmd = "pool list";
+ friend_output_fn = &gf_cli_output_pool_list;
+ } else {
+ cmd = "peer status";
+ friend_output_fn = &gf_cli_output_peer_status;
+ }
+
+ /* 'free' the flags set by gf_cli_list_friends */
+ frame->local = NULL;
+
if (-1 == req->rpc_status) {
goto out;
}
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
//rsp.op_ret = -1;
//rsp.op_errno = EINVAL;
goto out;
}
- gf_log ("cli", GF_LOG_INFO, "Received resp to list: %d",
+ gf_log ("cli", GF_LOG_DEBUG, "Received resp to list: %d",
rsp.op_ret);
ret = rsp.op_ret;
@@ -323,7 +344,19 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
if (!rsp.op_ret) {
if (!rsp.friends.friends_len) {
- cli_out ("No peers present");
+ snprintf (msg, sizeof (msg),
+ "%s: No peers present", cmd);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ cli_err ("%s", msg);
ret = 0;
goto out;
}
@@ -345,69 +378,34 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("peerStatus", dict,
- rsp.op_ret, rsp.op_errno,
- NULL);
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, msg);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
- ret = dict_get_int32 (dict, "count", &count);
+ ret = dict_get_int32 (dict, "count", &count);
if (ret) {
goto out;
}
- cli_out ("Number of Peers: %d", count);
-
- while ( i <= count) {
- snprintf (key, 256, "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.connected", i);
- ret = dict_get_int32 (dict, key, &connected);
- if (ret)
- goto out;
- if (connected)
- connected_str = "Connected";
- else
- connected_str = "Disconnected";
-
- snprintf (key, 256, "friend%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.state", i);
- ret = dict_get_str (dict, key, &state);
- if (ret)
- goto out;
-
- if (!port) {
- cli_out ("\nHostname: %s\nUuid: %s\nState: %s "
- "(%s)",
- hostname_buf, uuid_buf, state,
- connected_str);
- } else {
- cli_out ("\nHostname: %s\nPort: %d\nUuid: %s\n"
- "State: %s (%s)", hostname_buf, port,
- uuid_buf, state, connected_str);
- }
- i++;
+ ret = friend_output_fn (dict, count);
+ if (ret) {
+ goto out;
}
} else {
- ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status (dict, rsp.op_ret,
+ rsp.op_errno, NULL);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ } else {
+ ret = -1;
+ }
goto out;
}
@@ -417,7 +415,7 @@ gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
out:
cli_cmd_broadcast_response (ret);
if (ret)
- cli_err ("Peer status unsuccessful");
+ cli_err ("%s: failed", cmd);
if (dict)
dict_destroy (dict);
@@ -451,14 +449,37 @@ cli_out_options ( char *substr, char *optstr, char *valstr)
cli_out ("%s: %s",ptr2 , valstr);
}
+static int
+_gf_cli_output_volinfo_opts (dict_t *d, char *k,
+ data_t *v, void *tmp)
+{
+ int ret = 0;
+ char *key = NULL;
+ char *ptr = NULL;
+ data_t *value = NULL;
+
+ key = tmp;
+
+ ptr = strstr (k, "option.");
+ if (ptr) {
+ value = v;
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ cli_out_options (key, k, v->data);
+ }
+out:
+ return ret;
+}
+
int
-gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
int ret = -1;
int opt_count = 0;
- int k = 0;
int32_t i = 0;
int32_t j = 1;
int32_t status = 0;
@@ -469,24 +490,27 @@ gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
int32_t replica_count = 0;
int32_t vol_type = 0;
int32_t transport = 0;
- char *ptr = NULL;
char *volume_id_str = NULL;
char *brick = NULL;
char *volname = NULL;
dict_t *dict = NULL;
- data_pair_t *pairs = NULL;
- data_t *value = NULL;
cli_local_t *local = NULL;
char key[1024] = {0};
char err_str[2048] = {0};
gf_cli_rsp rsp = {0};
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ // snap_volume variable helps in showing whether a volume is a normal
+ //volume or a volume for the snapshot
+ int32_t snap_volume = 0;
if (-1 == req->rpc_status)
goto out;
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -499,11 +523,9 @@ gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
}
if (!rsp.dict.dict_len) {
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML)
goto xml_output;
-#endif
- cli_out ("No volumes present");
+ cli_err ("No volumes present");
ret = 0;
goto out;
}
@@ -546,20 +568,15 @@ gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Volume %s does not exist",
local->get_vol.volname);
ret = -1;
-#if (HAVE_LIB_XML)
if (!(global_state->mode & GLUSTER_MODE_XML))
-#endif
- {
goto out;
- }
}
}
-#if (HAVE_LIB_XML)
xml_output:
if (global_state->mode & GLUSTER_MODE_XML) {
/* For GET_NEXT_VOLUME output is already begun in
- * and will also end in gf_cli3_1_get_next_volume()
+ * and will also end in gf_cli_get_next_volume()
*/
if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
ret = cli_xml_output_vol_info_begin
@@ -589,7 +606,6 @@ xml_output:
}
goto out;
}
-#endif
while ( i < count) {
cli_out (" ");
@@ -608,6 +624,11 @@ xml_output:
if (ret)
goto out;
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_int32 (dict, key, &snap_volume);
+ if (ret)
+ goto out;
+
snprintf (key, 256, "volume%d.brick_count", i);
ret = dict_get_int32 (dict, key, &brick_count);
if (ret)
@@ -645,9 +666,47 @@ xml_output:
vol_type = type + 3;
cli_out ("Volume Name: %s", volname);
- cli_out ("Type: %s", cli_volume_type[vol_type]);
+ cli_out ("Type: %s", cli_vol_type_str[vol_type]);
cli_out ("Volume ID: %s", volume_id_str);
- cli_out ("Status: %s", cli_volume_status[status]);
+ cli_out ("Status: %s", cli_vol_status_str[status]);
+ if (snap_volume)
+ cli_out ("Snap Volume: %s", "yes");
+ else
+ cli_out ("Snap Volume: %s", "no");
+
+#ifdef HAVE_BD_XLATOR
+ k = 0;
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ goto next;
+ do {
+ j = 0;
+ cli_out ("Xlator %d: %s", k + 1, caps);
+ do {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d",
+ i, k, j++);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ cli_out ("Capability %d: %s", j, caps);
+ } while (1);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d", i, ++k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ } while (1);
+
+next:
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
cli_out ("Number of Bricks: %d x %d x %d = %d",
@@ -655,10 +714,8 @@ xml_output:
stripe_count,
replica_count,
brick_count);
-
} else if (type == GF_CLUSTER_TYPE_NONE) {
cli_out ("Number of Bricks: %d", brick_count);
-
} else {
/* For both replicate and stripe, dist_count is
good enough */
@@ -686,15 +743,15 @@ xml_output:
goto out;
cli_out ("Brick%d: %s", j, brick);
+#ifdef HAVE_BD_XLATOR
+ snprintf (key, 256, "volume%d.vg%d", i, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (!ret)
+ cli_out ("Brick%d VG: %s", j, caps);
+#endif
j++;
}
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
- }
-
snprintf (key, 256, "volume%d.opt_count",i);
ret = dict_get_int32 (dict, key, &opt_count);
if (ret)
@@ -704,26 +761,12 @@ xml_output:
goto out;
cli_out ("Options Reconfigured:");
- k = 0;
- while (k < opt_count) {
-
- snprintf (key, 256, "volume%d.option.",i);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- cli_out_options (key, pairs->key,
- value->data);
- }
- pairs = pairs->next;
- }
- k++;
- }
+ snprintf (key, 256, "volume%d.option.",i);
+
+ ret = dict_foreach (dict, _gf_cli_output_volinfo_opts, key);
+ if (ret)
+ goto out;
i++;
}
@@ -738,18 +781,16 @@ out:
if (dict)
dict_destroy (dict);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.op_errstr);
gf_log ("cli", GF_LOG_INFO, "Returning: %d", ret);
return ret;
}
int
-gf_cli3_1_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -757,61 +798,70 @@ gf_cli3_1_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
cli_local_t *local = NULL;
char *volname = NULL;
dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
local = ((call_frame_t *) (myframe))->local;
- ((call_frame_t *) (myframe))->local = NULL;
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
+ gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
+
dict = local->dict;
ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
- gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
-
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volCreate", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_vol_create (rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ cli_err ("volume create: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume create: %s: failed", volname);
else
- cli_out ("Creation of volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful":
- "successful. Please start the volume to "
- "access data.");
+ cli_out ("volume create: %s: success: "
+ "please start the volume to access data", volname);
+
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (dict)
- dict_unref (dict);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
int
-gf_cli3_1_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -820,47 +870,197 @@ gf_cli3_1_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
char *volname = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
local = frame->local;
- frame->local = NULL;
if (local)
dict = local->dict;
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (frame->this->name, GF_LOG_ERROR,
"dict get failed");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volDelete", dict, rsp.op_ret,
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volDelete", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ cli_err ("volume delete: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume delete: %s: failed", volname);
+ else
+ cli_out ("volume delete: %s: success", volname);
+
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ free (rsp.dict.dict_val);
+
+ gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_get_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ char *uuid_str = NULL;
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ frame->local = NULL;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid get");
+
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to unserialize "
+ "response for uuid get");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get uuid "
+ "from dictionary");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidGenerate", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "") == 0)
+ cli_err ("Get uuid was unsuccessful");
+ else
+ cli_err ("%s", rsp.op_errstr);
+
+ } else {
+ cli_out ("UUID: %s", uuid_str);
+
+ }
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response (ret);
+ cli_local_wipe (local);
+ if (rsp.dict.dict_val)
+ free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref (dict);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int
+gf_cli3_1_uuid_reset_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {0,};
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ local = frame->local;
+ frame->local = NULL;
+
+ gf_log ("cli", GF_LOG_INFO, "Received resp to uuid reset");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict ("uuidReset", dict, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
cli_err ("%s", rsp.op_errstr);
else
- cli_out ("Deleting volume %s has been %s", volname,
+ cli_out ("resetting the peer uuid has been %s",
(rsp.op_ret) ? "unsuccessful": "successful");
ret = rsp.op_ret;
@@ -877,7 +1077,7 @@ out:
}
int
-gf_cli3_1_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -886,69 +1086,76 @@ gf_cli3_1_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
char *volname = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
if (local)
dict = local->dict;
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict get failed");
+ gf_log (frame->this->name, GF_LOG_ERROR, "dict get failed");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to start volume");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStart", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStart", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ cli_err ("volume start: %s: failed: %s", volname,
+ rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume start: %s: failed", volname);
else
- cli_out ("Starting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_out ("volume start: %s: success", volname);
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (dict)
- dict_unref (dict);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
int
-gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -957,29 +1164,29 @@ gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
char *volname = NULL;
call_frame_t *frame = NULL;
dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
if (local) {
dict = local->dict;
ret = dict_get_str (dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (frame->this->name, GF_LOG_ERROR,
"Unable to get volname from dict");
goto out;
}
@@ -987,38 +1194,47 @@ gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
gf_log ("cli", GF_LOG_INFO, "Received resp to stop volume");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStop", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new ();
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed rsp_dict unserialization");
+ goto out;
+ }
+ }
+
+ ret = cli_xml_output_generic_volume ("volStop", rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ cli_err ("volume stop: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err ("volume stop: %s: failed", volname);
else
- cli_out ("Stopping volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_out ("volume stop: %s: success", volname);
+
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (local)
- cli_local_wipe (local);
+ free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
return ret;
}
int
-gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1036,44 +1252,45 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
char msg[1024] = {0,};
gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
int32_t counter = 0;
- char *node_uuid = NULL;
+ char *node_name = NULL;
char key[256] = {0,};
int32_t i = 1;
uint64_t failures = 0;
+ uint64_t skipped = 0;
double elapsed = 0;
+ char *size_str = NULL;
+ char *task_id_str = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
- if (local) {
+ if (local)
local_dict = local->dict;
- }
ret = dict_get_str (local_dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (frame->this->name, GF_LOG_ERROR,
"Failed to get volname");
goto out;
}
ret = dict_get_int32 (local_dict, "rebalance-command", (int32_t*)&cmd);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (frame->this->name, GF_LOG_ERROR,
"Failed to get command");
goto out;
}
@@ -1093,24 +1310,27 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
}
}
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS))) {
- /* All other possibility is about starting a volume */
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ /* All other possibilites are about starting a rebalance */
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "Starting rebalance on volume %s has been %s",
- volname, (rsp.op_ret) ? "unsuccessful":
- "successful");
+ } else {
+ if (!rsp.op_ret) {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been successful.\nID: %s", volname,
+ task_id_str);
+ } else {
+ snprintf (msg, sizeof (msg),
+ "Starting rebalance on volume %s has "
+ "been unsuccessful.", volname);
+ }
+ }
goto done;
}
- ret = dict_get_int32 (dict, "count", &counter);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
- goto out;
- }
-
if (cmd == GF_DEFRAG_CMD_STOP) {
if (rsp.op_ret == -1) {
if (strcmp (rsp.op_errstr, ""))
@@ -1123,8 +1343,12 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto done;
} else {
snprintf (msg, sizeof (msg),
- "Stopped rebalance process on volume %s \n",
- volname);
+ "rebalance process may be in the middle of a "
+ "file migration.\nThe process will be fully "
+ "stopped once the migration of the file is "
+ "complete.\nPlease check rebalance process "
+ "for completion before doing any further "
+ "brick related tasks on the volume.");
}
}
if (cmd == GF_DEFRAG_CMD_STATUS) {
@@ -1139,119 +1363,118 @@ gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto done;
}
}
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------",
- "-----------", "-----------", "-----------", "-----------",
- "------------", "-----------");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_rebalance (cmd, dict, rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &counter);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
memset (key, 0, 256);
snprintf (key, 256, "files-%d", i);
ret = dict_get_uint64 (dict, key, &files);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get file count");
memset (key, 0, 256);
snprintf (key, 256, "size-%d", i);
ret = dict_get_uint64 (dict, key, &size);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get size of xfer");
memset (key, 0, 256);
snprintf (key, 256, "lookups-%d", i);
ret = dict_get_uint64 (dict, key, &lookup);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get lookedup file count");
memset (key, 0, 256);
snprintf (key, 256, "status-%d", i);
ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get status");
memset (key, 0, 256);
snprintf (key, 256, "failures-%d", i);
ret = dict_get_uint64 (dict, key, &failures);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get failures count");
memset (key, 0, 256);
+ snprintf (key, 256, "skipped-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get skipped count");
+ memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", i);
ret = dict_get_double (dict, key, &elapsed);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get run-time");
- switch (status_rcd) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
- cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64
- " %14s %14.2f", node_uuid, files, size, lookup,
- failures, status, elapsed);
+ status = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"PRIu64
+ " %13"PRIu64 " %20s %18.2f", node_name, files,
+ size_str, lookup, failures, skipped, status, elapsed);
+ GF_FREE(size_str);
+
i++;
} while (i <= counter);
done:
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRebalance", msg, rsp.op_ret,
- status_rcd, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML)
+ cli_xml_output_str ("volRebalance", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ else {
+ if (rsp.op_ret)
+ cli_err ("volume rebalance: %s: failed: %s", volname,
+ msg);
+ else
+ cli_out ("volume rebalance: %s: success: %s", volname,
+ msg);
}
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
ret = rsp.op_ret;
out:
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
+ free (rsp.op_errstr); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
if (dict)
dict_unref (dict);
- if (local_dict)
- dict_unref (local_dict);
- if (local)
- cli_local_wipe (local);
cli_cmd_broadcast_response (ret);
return ret;
}
int
-gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1264,7 +1487,8 @@ gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1273,7 +1497,6 @@ gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "Rename volume %s",
(rsp.op_ret) ? "unsuccessful": "successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volRename", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1282,12 +1505,12 @@ gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume rename: failed");
else
- cli_out ("%s", msg);
+ cli_out ("volume rename: success");
+
ret = rsp.op_ret;
out:
@@ -1296,7 +1519,7 @@ out:
}
int
-gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1309,19 +1532,19 @@ gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to reset");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
+ if (strcmp (rsp.op_errstr, ""))
snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
else
snprintf (msg, sizeof (msg), "reset volume %s",
(rsp.op_ret) ? "unsuccessful": "successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volReset", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1330,21 +1553,68 @@ gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume reset: failed: %s", msg);
else
- cli_out ("%s", msg);
+ cli_out ("volume reset: success: %s", msg);
+
ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
+ cli_cmd_broadcast_response (ret);
return ret;
}
+char *
+is_server_debug_xlator (void *myframe)
+{
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *debug_xlator = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+ if (strstr (*words, "trace") == NULL &&
+ strstr (*words, "error-gen") == NULL) {
+ words++;
+ continue;
+ }
+
+ key = *words;
+ words++;
+ value = *words;
+ if (value == NULL)
+ break;
+ if (strstr (value, "client")) {
+ words++;
+ continue;
+ } else {
+ if (!(strstr (value, "posix") || strstr (value, "acl")
+ || strstr (value, "locks") ||
+ strstr (value, "io-threads") ||
+ strstr (value, "marker") ||
+ strstr (value, "index"))) {
+ words++;
+ continue;
+ } else {
+ debug_xlator = gf_strdup (key);
+ break;
+ }
+ }
+ }
+
+ return debug_xlator;
+}
+
int
-gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1352,6 +1622,8 @@ gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
dict_t *dict = NULL;
char *help_str = NULL;
char msg[1024] = {0,};
+ char *debug_xlator = _gf_false;
+ char tmp_str[512] = {0,};
if (-1 == req->rpc_status) {
goto out;
@@ -1359,7 +1631,8 @@ gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1374,14 +1647,22 @@ gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (ret)
- goto out;
+ /* For brick processes graph change does not happen on the fly.
+ * The proces has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
+ */
+ debug_xlator = is_server_debug_xlator (myframe);
if (dict_get_str (dict, "help-str", &help_str) && !msg[0])
snprintf (msg, sizeof (msg), "Set volume %s",
(rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret == 0 && debug_xlator) {
+ snprintf (tmp_str, sizeof (tmp_str), "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator", debug_xlator);
+ }
-#if (HAVE_LIB_XML)
if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
ret = cli_xml_output_str ("volSet", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1390,25 +1671,35 @@ gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", ((help_str == NULL) ? msg : help_str));
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, ""))
+ cli_err ("volume set: failed: %s", rsp.op_errstr);
+ else
+ cli_err ("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+ if (debug_xlator == NULL)
+ cli_out ("volume set: success");
+ else
+ cli_out ("volume set: success%s", tmp_str);
+ }else {
+ cli_out ("%s", help_str);
+ }
+ }
ret = rsp.op_ret;
out:
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (debug_xlator);
cli_cmd_broadcast_response (ret);
return ret;
}
int
-gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1421,7 +1712,8 @@ gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1434,7 +1726,6 @@ gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "Add Brick %s",
(rsp.op_ret) ? "unsuccessful": "successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volAddBrick", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1443,20 +1734,17 @@ gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume add-brick: failed: %s", rsp.op_errstr);
else
- cli_out ("%s", msg);
+ cli_out ("volume add-brick: success");
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
@@ -1471,34 +1759,67 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
uint64_t size = 0;
uint64_t lookup = 0;
dict_t *dict = NULL;
- //char msg[1024] = {0,};
+ char msg[1024] = {0,};
char key[256] = {0,};
int32_t i = 1;
int32_t counter = 0;
- char *node_uuid = 0;
+ char *node_name = 0;
gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
uint64_t failures = 0;
+ uint64_t skipped = 0;
double elapsed = 0;
-
+ char *size_str = NULL;
+ int32_t command = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *cmd_str = "unknown";
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (frame)
+ local = frame->local;
+ ret = dict_get_int32 (local->dict, "command", &command);
+ if (ret)
goto out;
+ cmd = command;
+
+ switch (cmd) {
+ case GF_OP_CMD_STOP:
+ cmd_str = "stop";
+ break;
+ case GF_OP_CMD_STATUS:
+ cmd_str = "status";
+ break;
+ default:
+ break;
}
ret = rsp.op_ret;
if (rsp.op_ret == -1) {
if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed: %s", cmd_str, rsp.op_errstr);
else
- cli_err ("failed to get the status of "
- "remove-brick process");
+ snprintf (msg, sizeof (msg), "volume remove-brick %s: "
+ "failed", cmd_str);
+
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ cli_err ("%s", msg);
goto out;
}
@@ -1510,72 +1831,100 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
rsp.dict.dict_len,
&dict);
if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
+ strncpy (msg, "failed to unserialize req-buffer to "
+ "dictionary", sizeof (msg));
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ rsp.op_ret = -1;
+ goto xml_output;
+ }
+
+ gf_log ("cli", GF_LOG_ERROR, "%s", msg);
goto out;
}
}
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (strcmp (rsp.op_errstr, "")) {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ } else {
+ ret = cli_xml_output_vol_remove_brick (_gf_true, dict,
+ rsp.op_ret,
+ rsp.op_errno,
+ msg);
+ }
+ goto out;
+ }
+
ret = dict_get_int32 (dict, "count", &counter);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
+ gf_log (frame->this->name, GF_LOG_ERROR, "count not set");
goto out;
}
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run-time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------",
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status", "run-time in secs");
+ cli_out ("%40s %16s %13s %13s %13s %13s %14s %16s", "---------",
"-----------", "-----------", "-----------", "-----------",
- "------------", "------------");
+ "-----------","------------", "--------------");
do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
+ snprintf (key, 256, "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "failed to get node-name");
memset (key, 0, 256);
snprintf (key, 256, "files-%d", i);
ret = dict_get_uint64 (dict, key, &files);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get file count");
memset (key, 0, 256);
snprintf (key, 256, "size-%d", i);
ret = dict_get_uint64 (dict, key, &size);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get size of xfer");
memset (key, 0, 256);
snprintf (key, 256, "lookups-%d", i);
ret = dict_get_uint64 (dict, key, &lookup);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get lookedup file count");
memset (key, 0, 256);
snprintf (key, 256, "status-%d", i);
ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get status");
snprintf (key, 256, "failures-%d", i);
ret = dict_get_uint64 (dict, key, &failures);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"Failed to get failure on files");
+ snprintf (key, 256, "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ gf_log (frame->this->name, GF_LOG_TRACE,
+ "Failed to get skipped files");
memset (key, 0, 256);
snprintf (key, 256, "run-time-%d", i);
ret = dict_get_double (dict, key, &elapsed);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"Failed to get run-time");
switch (status_rcd) {
@@ -1594,31 +1943,34 @@ gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
case GF_DEFRAG_STATUS_FAILED:
status = "failed";
break;
+ default:
+ break;
}
- cli_out ("%40s %16"PRId64 "%13"PRId64 "%13"PRId64 "%13"PRId64
- " %14s %14.2f", node_uuid, files, size, lookup,
- failures, status, elapsed);
+
+ size_str = gf_uint64_2human_readable(size);
+
+ if (strcmp (status, "not started")) {
+ cli_out ("%40s %16"PRIu64 " %13s" " %13"PRIu64 " %13"
+ PRIu64 " %13"PRIu64 " %14s %16.2f", node_name,
+ files, size_str, lookup, failures, skipped,
+ status, elapsed);
+ }
+ GF_FREE(size_str);
+
i++;
} while (i <= counter);
- //TODO: Do proper xml output
- /*
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if ((cmd == GF_OP_CMD_STOP) && (rsp.op_ret == 0)) {
+ cli_out ("'remove-brick' process may be in the middle of a "
+ "file migration.\nThe process will be fully stopped "
+ "once the migration of the file is complete.\nPlease "
+ "check remove-brick process for completion before "
+ "doing any further brick related tasks on the "
+ "volume.");
}
-#endif
- cli_out ("%s", msg);
- */
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
if (dict)
dict_unref (dict);
cli_cmd_broadcast_response (ret);
@@ -1627,7 +1979,7 @@ out:
int
-gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1637,6 +1989,8 @@ gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
char *cmd_str = "unknown";
cli_local_t *local = NULL;
call_frame_t *frame = NULL;
+ char *task_id_str = NULL;
+ dict_t *rsp_dict = NULL;
if (-1 == req->rpc_status) {
goto out;
@@ -1647,7 +2001,8 @@ gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1657,10 +2012,31 @@ gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
- switch (cmd) {
+ if (rsp.dict.dict_len) {
+ rsp_dict = dict_new ();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to unserialize rsp_dict");
+ goto out;
+ }
+ }
+ switch (cmd) {
case GF_OP_CMD_START:
cmd_str = "start";
+
+ ret = dict_get_str (rsp_dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "remove-brick-id is not present in dict");
+ }
break;
case GF_OP_CMD_COMMIT:
cmd_str = "commit";
@@ -1681,36 +2057,31 @@ gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "Remove Brick %s %s", cmd_str,
(rsp.op_ret) ? "unsuccessful": "successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ ret = cli_xml_output_vol_remove_brick (_gf_false, rsp_dict,
+ rsp.op_ret, rsp.op_errno,
+ msg);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
-
-out:
- if (frame)
- frame->local = NULL;
- if (local) {
- dict_unref (local->dict);
- cli_local_wipe (local);
+ if (rsp.op_ret) {
+ cli_err ("volume remove-brick %s: failed: %s", cmd_str,
+ msg);
+ } else {
+ cli_out ("volume remove-brick %s: success", cmd_str);
+ if (GF_OP_CMD_START == cmd && task_id_str != NULL)
+ cli_out ("ID: %s", task_id_str);
}
+ ret = rsp.op_ret;
+
+out:
cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
return ret;
}
@@ -1718,7 +2089,7 @@ out:
int
-gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1732,7 +2103,8 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
gf1_cli_replace_op replace_op = 0;
char *rb_operation_str = NULL;
dict_t *rsp_dict = NULL;
- char msg[1024] = {0,};
+ char msg[1024] = {0,};
+ char *task_id_str = NULL;
if (-1 == req->rpc_status) {
goto out;
@@ -1742,7 +2114,8 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1757,58 +2130,77 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new ();
+
+ ret = dict_unserialize (rsp.dict.dict_val,
+ rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret < 0) {
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "failed to "
+ "unserialize rsp buffer to dictionary");
+ goto out;
+ }
+ }
+
switch (replace_op) {
case GF_REPLACE_OP_START:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick failed to start";
- else
- rb_operation_str = "replace-brick started successfully";
+ if (rsp.op_ret) {
+ rb_operation_str = gf_strdup ("replace-brick failed to"
+ " start");
+ } else {
+ ret = dict_get_str (rsp_dict, GF_REPLACE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get "
+ "\"replace-brick-id\" from dict");
+ goto out;
+ }
+ ret = gf_asprintf (&rb_operation_str,
+ "replace-brick started successfully"
+ "\nID: %s", task_id_str);
+ if (ret < 0)
+ goto out;
+ }
break;
case GF_REPLACE_OP_STATUS:
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick status unknown";
- else {
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- rsp_dict = dict_new ();
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &rsp_dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+ if (rsp.op_ret || ret) {
+ rb_operation_str = gf_strdup ("replace-brick status "
+ "unknown");
+ } else {
ret = dict_get_str (rsp_dict, "status-reply",
&status_reply);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to"
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to"
"get status");
goto out;
}
- rb_operation_str = status_reply;
+ rb_operation_str = gf_strdup (status_reply);
}
break;
case GF_REPLACE_OP_PAUSE:
if (rsp.op_ret)
- rb_operation_str = "replace-brick pause failed";
+ rb_operation_str = gf_strdup ("replace-brick pause "
+ "failed");
else
- rb_operation_str = "replace-brick paused successfully";
+ rb_operation_str = gf_strdup ("replace-brick paused "
+ "successfully");
break;
case GF_REPLACE_OP_ABORT:
if (rsp.op_ret)
- rb_operation_str = "replace-brick abort failed";
+ rb_operation_str = gf_strdup ("replace-brick abort "
+ "failed");
else
- rb_operation_str = "replace-brick aborted successfully";
+ rb_operation_str = gf_strdup ("replace-brick aborted "
+ "successfully");
break;
case GF_REPLACE_OP_COMMIT:
@@ -1829,9 +2221,11 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick commit failed";
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "failed");
else
- rb_operation_str = "replace-brick commit successful";
+ rb_operation_str = gf_strdup ("replace-brick commit "
+ "successful");
break;
@@ -1842,28 +2236,27 @@ gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
}
if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) {
- rb_operation_str = rsp.op_errstr;
+ rb_operation_str = gf_strdup (rsp.op_errstr);
}
gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick");
- snprintf (msg,sizeof (msg), "%s",
+ snprintf (msg, sizeof (msg), "%s",
rb_operation_str ? rb_operation_str : "Unknown operation");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volReplaceBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ ret = cli_xml_output_vol_replace_brick (replace_op, rsp_dict,
+ rsp.op_ret,
+ rsp.op_errno, msg);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume replace-brick: failed: %s", msg);
else
- cli_out ("%s", msg);
+ cli_out ("volume replace-brick: success: %s", msg);
ret = rsp.op_ret;
out:
@@ -1875,9 +2268,11 @@ out:
cli_local_wipe (local);
}
+ if (rb_operation_str)
+ GF_FREE (rb_operation_str);
+
cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
if (rsp_dict)
dict_unref (rsp_dict);
@@ -1886,7 +2281,7 @@ out:
static int
-gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1899,7 +2294,8 @@ gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -1911,7 +2307,6 @@ gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "log rotate %s",
(rsp.op_ret) ? "unsuccessful": "successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volLogRotate", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1920,24 +2315,22 @@ gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume log-rotate: failed: %s", msg);
else
- cli_out ("%s", msg);
+ cli_out ("volume log-rotate: success");
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
return ret;
}
static int
-gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -1950,19 +2343,20 @@ gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received resp to sync");
if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
+ snprintf (msg, sizeof (msg), "volume sync: failed: %s",
+ rsp.op_errstr);
else
snprintf (msg, sizeof (msg), "volume sync: %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ (rsp.op_ret) ? "failed": "success");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volSync", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -1971,7 +2365,6 @@ gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
cli_err ("%s", msg);
@@ -1985,12 +2378,12 @@ out:
}
int32_t
-gf_cli3_1_print_limit_list (char *volname, char *limit_list,
+gf_cli_print_limit_list (char *volname, char *limit_list,
char *op_errstr)
{
int64_t size = 0;
int64_t limit_value = 0;
- int32_t i, j, k;
+ int32_t i, j;
int32_t len = 0, ret = -1;
char *size_str = NULL;
char path [PATH_MAX] = {0, };
@@ -1998,6 +2391,7 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
char value [1024] = {0, };
char mountdir [] = "/tmp/mntXXXXXX";
char abspath [PATH_MAX] = {0, };
+ char *colon_ptr = NULL;
runner_t runner = {0,};
GF_VALIDATE_OR_GOTO ("cli", volname, out);
@@ -2034,7 +2428,7 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
len = strlen (limit_list);
if (len == 0) {
- cli_out ("quota limit not set ");
+ cli_err ("quota limit not set ");
goto unmount;
}
@@ -2045,19 +2439,16 @@ gf_cli3_1_print_limit_list (char *volname, char *limit_list,
"-----------------------");
while (i < len) {
j = 0;
- k = 0;
-
- while (limit_list [i] != ':') {
- path [k++] = limit_list [i++];
- }
- path [k] = '\0';
-
- i++; //skip ':'
while (limit_list [i] != ',' && limit_list [i] != '\0') {
- value [j++] = limit_list[i++];
+ path [j++] = limit_list[i++];
}
- value [j] = '\0';
+ path [j] = '\0';
+ //here path[] contains both path and limit value
+
+ colon_ptr = strrchr (path, ':');
+ *colon_ptr = '\0';
+ strcpy (value, ++colon_ptr);
snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
@@ -2100,7 +2491,7 @@ out:
}
int
-gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_quota_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -2109,15 +2500,19 @@ gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
char *volname = NULL;
char *limit_list = NULL;
int32_t type = 0;
- char msg[1024] = {0,};
+ char msg[1024] = {0,};
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -2125,10 +2520,9 @@ gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
strcmp (rsp.op_errstr, "") == 0) {
snprintf (msg, sizeof (msg), "command unsuccessful %s",
rsp.op_errstr);
-#if (HAVE_LIB_XML)
+
if (global_state->mode & GLUSTER_MODE_XML)
goto xml_output;
-#endif
goto out;
}
@@ -2149,21 +2543,20 @@ gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_str (dict, "volname", &volname);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get volname");
ret = dict_get_str (dict, "limit_list", &limit_list);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get limit_list");
ret = dict_get_int32 (dict, "type", &type);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
+ gf_log (frame->this->name, GF_LOG_TRACE,
"failed to get type");
if (type == GF_QUOTA_OPTION_TYPE_LIST) {
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_quota_limit_list
(volname, limit_list, rsp.op_ret,
@@ -2174,9 +2567,9 @@ gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
-#endif
+
if (limit_list) {
- gf_cli3_1_print_limit_list (volname,
+ gf_cli_print_limit_list (volname,
limit_list,
rsp.op_errstr);
} else {
@@ -2194,7 +2587,6 @@ gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "successful");
}
-#if (HAVE_LIB_XML)
xml_output:
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volQuota", msg, rsp.op_ret,
@@ -2204,7 +2596,6 @@ xml_output:
"Error outputting to xml");
goto out;
}
-#endif
if (strlen (msg) > 0) {
if (rsp.op_ret)
@@ -2216,28 +2607,39 @@ xml_output:
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
+ if (dict)
+ dict_unref (dict);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
return ret;
}
int
-gf_cli3_1_getspec_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_getspec_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_getspec_rsp rsp = {0,};
int ret = -1;
char *spec = NULL;
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "getspec failed");
goto out;
}
@@ -2261,20 +2663,30 @@ out:
}
int
-gf_cli3_1_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
pmap_port_by_brick_rsp rsp = {0,};
int ret = -1;
char *spec = NULL;
+ call_frame_t *frame = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "pump_b2p failed");
goto out;
}
@@ -2292,13 +2704,12 @@ out:
int32_t
-gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
+gf_cli_probe (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_probe_req req = {0,};
+ gf_cli_req req = {{0,},};
int ret = 0;
dict_t *dict = NULL;
- char *hostname = NULL;
int port = 0;
if (!frame || !this || !data) {
@@ -2307,35 +2718,32 @@ gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
}
dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
-
- req.hostname = hostname;
- req.port = port;
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROBE, NULL,
- this, gf_cli3_1_probe_cbk,
- (xdrproc_t)xdr_gf1_cli_probe_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_probe_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROBE, this, cli_rpc_prog, NULL);
out:
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
return ret;
}
int32_t
-gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
+gf_cli_deprobe (call_frame_t *frame, xlator_t *this,
void *data)
{
- gf1_cli_deprobe_req req = {0,};
+ gf_cli_req req = {{0,},};
int ret = 0;
dict_t *dict = NULL;
- char *hostname = NULL;
int port = 0;
int flags = 0;
@@ -2345,57 +2753,70 @@ gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
}
dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
-
ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
+ if (ret) {
+ ret = dict_set_int32 (dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
ret = dict_get_int32 (dict, "flags", &flags);
- if (ret)
- flags = 0;
+ if (ret) {
+ ret = dict_set_int32 (dict, "flags", 0);
+ if (ret)
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
- req.flags = flags;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEPROBE, NULL,
- this, gf_cli3_1_deprobe_cbk,
- (xdrproc_t)xdr_gf1_cli_deprobe_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_deprobe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEPROBE, this, cli_rpc_prog, NULL);
out:
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
return ret;
}
int32_t
-gf_cli3_1_list_friends (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_list_friends (call_frame_t *frame, xlator_t *this,
+ void *data)
{
gf1_cli_peer_list_req req = {0,};
int ret = 0;
+ unsigned long flags = 0;
if (!frame || !this) {
ret = -1;
goto out;
}
- req.flags = GF_CLI_LIST_ALL;
+ GF_ASSERT (frame->local == NULL);
+ flags = (long)data;
+ req.flags = flags;
+ frame->local = (void*)flags;
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_LIST_FRIENDS, NULL,
- this, gf_cli3_1_list_friends_cbk,
+ this, gf_cli_list_friends_cbk,
(xdrproc_t) xdr_gf1_cli_peer_list_req);
out:
+ if (ret) {
+ /*
+ * If everything goes fine, gf_cli_list_friends_cbk()
+ * [invoked through cli_cmd_submit()]resets the
+ * frame->local to NULL. In case cli_cmd_submit()
+ * fails in between, RESET frame->local here.
+ */
+ frame->local = NULL;
+ }
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_next_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
@@ -2411,7 +2832,6 @@ gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
ctx = data;
local = frame->local;
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_info_begin (local, 0, 0, "");
if (ret) {
@@ -2419,17 +2839,15 @@ gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
goto out;
}
}
-#endif
- ret = gf_cli3_1_get_volume (frame, this, data);
+ ret = gf_cli_get_volume (frame, this, data);
if (!local || !local->get_vol.volname) {
-#if (HAVE_LIB_XML)
if ((global_state->mode & GLUSTER_MODE_XML))
goto end_xml;
-#endif
- cli_out ("No volumes present");
+
+ cli_err ("No volumes present");
goto out;
}
@@ -2437,20 +2855,18 @@ gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
ctx->volname = local->get_vol.volname;
while (ctx->volname) {
- ret = gf_cli3_1_get_volume (frame, this, ctx);
+ ret = gf_cli_get_volume (frame, this, ctx);
if (ret)
goto out;
ctx->volname = local->get_vol.volname;
}
-#if (HAVE_LIB_XML)
end_xml:
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_info_end (local);
if (ret)
gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
}
-#endif
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2458,7 +2874,7 @@ out:
}
int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_get_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -2487,90 +2903,108 @@ gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
flags = ctx->flags;
ret = dict_set_int32 (dict, "flags", flags);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set flags");
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to set flags");
goto out;
}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_GET_VOLUME, NULL,
- this, gf_cli3_1_get_volume_cbk,
+ this, gf_cli_get_volume_cbk,
(xdrproc_t) xdr_gf_cli_req);
out:
if (dict)
dict_unref (dict);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
-
int32_t
-gf_cli3_1_create_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli3_1_uuid_get (call_frame_t *frame, xlator_t *this,
+ void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
+ if (!frame || !this || !data) {
ret = -1;
goto out;
}
- dict = dict_ref ((dict_t *)data);
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_get_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_GET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli3_1_uuid_reset (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+ if (!frame || !this || !data) {
+ ret = -1;
goto out;
}
- local = cli_local_get ();
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli3_1_uuid_reset_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_cli_create_volume (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CREATE_VOLUME, NULL,
- this, gf_cli3_1_create_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
-
+ dict = data;
+ ret = cli_to_glusterd (&req, frame, gf_cli_create_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (dict)
- dict_unref (dict);
-
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_delete_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_delete_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = NULL;
if (!frame || !this || !data) {
@@ -2578,50 +3012,26 @@ gf_cli3_1_delete_volume (call_frame_t *frame, xlator_t *this,
goto out;
}
- local = cli_local_get ();
-
- dict = dict_new ();
- ret = dict_set_str (dict, "volname", data);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
- goto out;
- }
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialize dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DELETE_VOLUME, NULL,
- this, gf_cli3_1_delete_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_delete_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
- if (dict)
- dict_unref (dict);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_start_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_start_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = NULL;
if (!frame || !this || !data) {
@@ -2630,27 +3040,11 @@ gf_cli3_1_start_volume (call_frame_t *frame, xlator_t *this,
}
dict = data;
- local = cli_local_get ();
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize dict");
- goto out;
- }
-
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_START_VOLUME, NULL,
- this, gf_cli3_1_start_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_start_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2659,12 +3053,11 @@ out:
}
int32_t
-gf_cli3_1_stop_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_stop_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = data;
if (!frame || !this || !data) {
@@ -2672,28 +3065,12 @@ gf_cli3_1_stop_volume (call_frame_t *frame, xlator_t *this,
goto out;
}
- local = cli_local_get ();
dict = data;
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STOP_VOLUME, NULL,
- this, gf_cli3_1_stop_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_stop_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2702,17 +3079,12 @@ out:
}
int32_t
-gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_defrag_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
- char *volname = NULL;
- char *cmd_str = NULL;
dict_t *dict = NULL;
- gf_cli_defrag_type cmd = 0;
- dict_t *req_dict = NULL;
if (!frame || !this || !data) {
ret = -1;
@@ -2721,81 +3093,10 @@ gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- gf_log ("", GF_LOG_DEBUG, "error");
-
- ret = dict_get_str (dict, "command", &cmd_str);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "error");
- goto out;
- }
-
- if (strcmp (cmd_str, "start") == 0) {
- cmd = GF_DEFRAG_CMD_START;
- ret = dict_get_str (dict, "option", &cmd_str);
- if (!ret) {
- if (strcmp (cmd_str, "force") == 0) {
- cmd = GF_DEFRAG_CMD_START_FORCE;
- }
- }
- goto done;
- }
-
- if (strcmp (cmd_str, "fix-layout") == 0) {
- cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
- goto done;
- }
- if (strcmp (cmd_str, "stop") == 0) {
- cmd = GF_DEFRAG_CMD_STOP;
- goto done;
- }
- if (strcmp (cmd_str, "status") == 0) {
- cmd = GF_DEFRAG_CMD_STATUS;
- }
-
-done:
- local = cli_local_get ();
-
- req_dict = dict_new ();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str (req_dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
-
- ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
-
- if (local) {
- local->dict = dict_ref (req_dict);
- frame->local = local;
- }
-
- ret = dict_allocate_and_serialize (req_dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_1_defrag_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_defrag_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2804,7 +3105,7 @@ out:
}
int32_t
-gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_rename_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -2818,9 +3119,8 @@ gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
+ ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
+ &req.dict.dict_len);
if (ret < 0) {
gf_log (this->name, GF_LOG_ERROR,
"failed to serialize the data");
@@ -2831,7 +3131,7 @@ gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_RENAME_VOLUME, NULL,
- this, gf_cli3_1_rename_volume_cbk,
+ this, gf_cli_rename_volume_cbk,
(xdrproc_t) xdr_gf_cli_req);
out:
@@ -2841,7 +3141,7 @@ out:
}
int32_t
-gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_reset_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -2855,28 +3155,18 @@ gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RESET_VOLUME, NULL,
- this, gf_cli3_1_reset_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_reset_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_set_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -2890,19 +3180,10 @@ gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_SET_VOLUME, NULL,
- this, gf_cli3_1_set_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_set_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -2911,7 +3192,7 @@ out:
}
int32_t
-gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_add_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -2936,33 +3217,20 @@ gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
if (ret)
goto out;
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_ADD_BRICK, NULL,
- this, gf_cli3_1_add_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_remove_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};;
@@ -2973,27 +3241,14 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
char *volname = NULL;
dict_t *req_dict = NULL;
int32_t cmd = 0;
- cli_local_t *local = NULL;
if (!frame || !this || !data) {
ret = -1;
goto out;
}
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
-
- frame->local = local;
-
dict = data;
- local->dict = dict_ref (dict);
-
ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
@@ -3005,21 +3260,13 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
if ((command != GF_OP_CMD_STATUS) &&
(command != GF_OP_CMD_STOP)) {
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REMOVE_BRICK, NULL,
- this, gf_cli3_1_remove_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_remove_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REMOVE_BRICK, this,
+ cli_rpc_prog, NULL);
} else {
- /* Need rebalance status to e sent :-) */
+ /* Need rebalance status to be sent :-) */
req_dict = dict_new ();
if (!req_dict) {
ret = -1;
@@ -3028,7 +3275,7 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
ret = dict_set_str (req_dict, "volname", volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict");
goto out;
}
@@ -3040,51 +3287,37 @@ gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"Failed to set dict");
goto out;
}
- ret = dict_allocate_and_serialize (req_dict,
- &status_req.dict.dict_val,
- (size_t *) &status_req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ ret = cli_to_glusterd (&status_req, frame,
+ gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, req_dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this,
+ cli_rpc_prog, NULL);
- goto out;
}
- ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_remove_brick_status_cbk,
- (xdrproc_t) xdr_gf_cli_req);
-
- }
-
out:
+ if (req_dict)
+ dict_unref (req_dict);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- if (status_req.dict.dict_val)
- GF_FREE (status_req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
- if (req_dict)
- dict_unref (req_dict);
+ GF_FREE (status_req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
+gf_cli_replace_brick (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = NULL;
char *src_brick = NULL;
char *dst_brick = NULL;
@@ -3098,17 +3331,6 @@ gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
dict = data;
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
-
- local->dict = dict_ref (dict);
- frame->local = local;
-
ret = dict_get_int32 (dict, "operation", &op);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -3141,34 +3363,22 @@ gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
"%s with operation=%d", src_brick,
dst_brick, op);
-
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REPLACE_BRICK, NULL,
- this, gf_cli3_1_replace_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_replace_brick_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_log_rotate (call_frame_t *frame, xlator_t *this,
+gf_cli_log_rotate (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -3182,31 +3392,20 @@ gf_cli3_1_log_rotate (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
-
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_ROTATE, NULL,
- this, gf_cli3_1_log_rotate_cbk,
- (xdrproc_t) xdr_gf_cli_req);
-
+ ret = cli_to_glusterd (&req, frame, gf_cli_log_rotate_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_sync_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_sync_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
int ret = 0;
@@ -3219,35 +3418,27 @@ gf_cli3_1_sync_volume (call_frame_t *frame, xlator_t *this,
}
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame,
- cli_rpc_prog, GLUSTER_CLI_SYNC_VOLUME,
- NULL, this, gf_cli3_1_sync_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_sync_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_getspec (call_frame_t *frame, xlator_t *this,
+gf_cli_getspec (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_getspec_req req = {0,};
int ret = 0;
dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
if (!frame || !this || !data) {
ret = -1;
@@ -3260,19 +3451,52 @@ gf_cli3_1_getspec (call_frame_t *frame, xlator_t *this,
if (ret)
goto out;
+ op_dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (op_dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
ret = cli_cmd_submit (&req, frame, &cli_handshake_prog,
GF_HNDSK_GETSPEC, NULL,
- this, gf_cli3_1_getspec_cbk,
+ this, gf_cli_getspec_cbk,
(xdrproc_t) xdr_gf_getspec_req);
out:
+ if (op_dict) {
+ dict_unref(op_dict);
+ }
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int32_t
-gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
+gf_cli_quota (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -3286,27 +3510,18 @@ gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_QUOTA, NULL,
- this, gf_cli3_1_quota_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_quota_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_QUOTA, this, cli_rpc_prog, NULL);
- GF_FREE (req.dict.dict_val);
out:
+ GF_FREE (req.dict.dict_val);
+
return ret;
}
int32_t
-gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
{
pmap_port_by_brick_req req = {0,};
int ret = 0;
@@ -3325,7 +3540,7 @@ gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, &cli_pmap_prog,
GF_PMAP_PORTBYBRICK, NULL,
- this, gf_cli3_1_pmap_b2p_cbk,
+ this, gf_cli_pmap_b2p_cbk,
(xdrproc_t) xdr_pmap_port_by_brick_req);
out:
@@ -3335,7 +3550,7 @@ out:
}
static int
-gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_fsm_log_rsp rsp = {0,};
@@ -3355,7 +3570,8 @@ gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3386,7 +3602,7 @@ gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
if (tr_count)
cli_out("number of transitions: %d", tr_count);
else
- cli_out("No transitions");
+ cli_err("No transitions");
for (i = 0; i < tr_count; i++) {
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "log%d-old-state", i);
@@ -3425,7 +3641,7 @@ out:
}
int32_t
-gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf1_cli_fsm_log_req req = {0,};
@@ -3439,7 +3655,7 @@ gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
req.name = data;
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_FSM_LOG, NULL,
- this, gf_cli3_1_fsm_log_cbk,
+ this, gf_cli_fsm_log_cbk,
(xdrproc_t) xdr_gf1_cli_fsm_log_req);
out:
@@ -3449,14 +3665,17 @@ out:
}
int
-gf_cli3_1_gsync_config_command (dict_t *dict)
+gf_cli_gsync_config_command (dict_t *dict)
{
runner_t runner = {0,};
char *subop = NULL;
char *gwd = NULL;
char *slave = NULL;
+ char *confpath = NULL;
char *master = NULL;
char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
if (dict_get_str (dict, "subop", &subop) != 0)
return -1;
@@ -3475,9 +3694,17 @@ gf_cli3_1_gsync_config_command (dict_t *dict)
if (dict_get_str (dict, "op_name", &op_name) != 0)
op_name = NULL;
+ ret = dict_get_str (dict, "conf_path", &confpath);
+ if (!confpath) {
+ ret = snprintf (conf_path, sizeof(conf_path) - 1,
+ "%s/"GEOREP"/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
runinit (&runner);
runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gwd);
+ runner_argprintf (&runner, "%s", confpath);
if (master)
runner_argprintf (&runner, ":%s", master);
runner_add_arg (&runner, slave);
@@ -3489,64 +3716,643 @@ gf_cli3_1_gsync_config_command (dict_t *dict)
}
int
-gf_cli3_1_gsync_out_status (dict_t *dict)
+gf_cli_fetch_gsyncd_status_values (char *status,
+ gf_cli_gsync_status_t *sts_val)
{
- int gsync_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0, };
- char slv[PATH_MAX]= {0, };
- char sts[PATH_MAX] = {0, };
- char hyphens[81] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
-
- cli_out ("%-20s %-50s %-10s", "MASTER", "SLAVE", "STATUS");
-
- for (i=0; i<sizeof(hyphens)-1; i++)
- hyphens[i] = '-';
+ int32_t ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
- cli_out ("%s", hyphens);
+ if (!status || !sts_val) {
+ gf_log ("", GF_LOG_ERROR, "status or sts_val is null");
+ goto out;
+ }
+ tmp = strtok_r (status, "\n", &save_ptr);
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (tmp)
+ sts_val->health = gf_strdup (tmp);
+
+ while (tmp) {
+ key = strtok_r (tmp, "=", &value);
+
+ if ((key) && (!strcmp(key, "Uptime")))
+ sts_val->uptime = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesSyncd")))
+ sts_val->files_syncd = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "FilesPending")))
+ sts_val->files_pending = gf_strdup (value);
+
+ if ((key) && (!strcmp(key, "BytesPending"))) {
+ value = gf_uint64_2human_readable(atol(value));
+ sts_val->bytes_pending = gf_strdup (value);
+ }
+
+ if ((key) && (!strcmp(key, "DeletesPending")))
+ sts_val->deletes_pending = gf_strdup (value);
+
+ tmp = strtok_r (NULL, ";", &save_ptr);
+ }
+
+ if (sts_val->health)
+ ret = 0;
+
+ if (!sts_val->uptime)
+ sts_val->uptime = gf_strdup ("N/A");
+
+ if (!sts_val->files_syncd)
+ sts_val->files_syncd = gf_strdup ("N/A");
+
+ if (!sts_val->files_pending)
+ sts_val->files_pending = gf_strdup ("N/A");
+
+ if (!sts_val->bytes_pending)
+ sts_val->bytes_pending = gf_strdup ("N/A");
+
+ if (!sts_val->deletes_pending)
+ sts_val->deletes_pending = gf_strdup ("N/A");
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret);
+ return ret;
+}
+
+char*
+get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0: return (sts_val->node);
+ case 1: return (sts_val->master);
+ case 2: return (sts_val->slave);
+ case 3: return (sts_val->health);
+ case 4: return (sts_val->uptime);
+ case 5: return (sts_val->files_syncd);
+ case 6: return (sts_val->files_pending);
+ case 7: return (sts_val->bytes_pending);
+ case 8: return (sts_val->deletes_pending);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
+
+int
+gf_cli_print_status (char **title_values,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields, int is_detail)
+{
+ int indents = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int total_spacing = 0;
+ char **output_values = NULL;
+ char *tmp = NULL;
+ char *hyphens = NULL;
+ char heading[PATH_MAX] = {0, };
+ char indent_spaces[PATH_MAX] = {0, };
+
+ /* calculating spacing for hyphens */
+ for (i = 0; i < number_of_fields; i++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((i == 1) || (i == 2))) {
+ total_spacing++;
+ continue;
+ } else if ((!is_detail) && (i > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ continue;
+ }
+ spacing[i] += 3; /* Adding extra space to
+ distinguish between fields */
+ total_spacing += spacing[i];
+ }
+ total_spacing += 4; /* For the spacing between the fields */
+
+ /* char pointers for each field */
+ output_values = GF_CALLOC (number_of_fields, sizeof (char *),
+ gf_common_mt_char);
+ if (!output_values) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < number_of_fields; i++) {
+ output_values[i] = GF_CALLOC (spacing[i] + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!output_values[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ hyphens = GF_CALLOC (total_spacing + 1, sizeof (char),
+ gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s",
+ sts_vals[0]->master, sts_vals[0]->slave);
if (ret) {
- gf_log ("cli", GF_LOG_INFO, "No active geo-replication sessions"
- "present for the selected");
+ if (ret < sizeof(heading))
+ heading[ret] = '\0';
+ else
+ heading[sizeof(heading) - 1] = '\0';
ret = 0;
+ } else {
+ ret = -1;
goto out;
}
- for (i = 1; i <= gsync_count; i++) {
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
+ if (is_detail) {
+ cli_out (" ");
+ if (strlen(heading) > total_spacing)
+ cli_out ("%s", heading);
+ else {
+ /* Printing the heading with centre justification */
+ indents = (total_spacing - strlen(heading)) / 2;
+ memset (indent_spaces, ' ', indents);
+ indent_spaces[indents] = '\0';
+ ret = snprintf (hyphens, total_spacing, "%s%s",
+ indent_spaces, heading);
+ if (ret) {
+ hyphens[ret] = '\0';
+ cli_out ("%s", hyphens);
+ ret = 0;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ cli_out (" ");
+ }
+
+ /* setting the title "NODE", "MASTER", etc. from title_values[]
+ and printing the same */
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], title_values[j],
+ strlen(title_values[j]));
+ output_values[j][spacing[j]] = '\0';
+ }
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+
+ /* setting and printing the hyphens */
+ memset (hyphens, '-', total_spacing);
+ hyphens[total_spacing] = '\0';
+ cli_out ("%s", hyphens);
+
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 0; j < number_of_fields; j++) {
+ /* Suppressing master and slave output for
+ * status detail */
+ if ((is_detail) && ((j == 1) || (j == 2))) {
+ output_values[j][0] = '\0';
+ continue;
+ } else if ((!is_detail) && (j > 4)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ memset (output_values[j], ' ', spacing[j]);
+ memcpy (output_values[j], tmp, strlen (tmp));
+ output_values[j][spacing[j]] = '\0';
+ }
+
+ cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8]);
+ }
+
+out:
+ if (output_values) {
+ for (i = 0; i < number_of_fields; i++) {
+ if (output_values[i])
+ GF_FREE (output_values[i]);
+ }
+ GF_FREE (output_values);
+ }
+
+ if (hyphens)
+ GF_FREE (hyphens);
- ret = dict_get_str (dict, mst, &mst_val);
+ return ret;
+}
+
+int
+gf_cli_read_status_data (dict_t *dict,
+ gf_cli_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count,
+ int number_of_fields)
+{
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char mst[PATH_MAX] = {0, };
+ char slv[PATH_MAX] = {0, };
+ char sts[PATH_MAX] = {0, };
+ char nds[PATH_MAX] = {0, };
+ char *status = NULL;
+ char *tmp = NULL;
+
+ /* Storing per node status info in each object */
+ for (i = 0; i < gsync_count; i++) {
+ snprintf (nds, sizeof(nds), "node%d", i + 1);
+ snprintf (mst, sizeof(mst), "master%d", i + 1);
+ snprintf (slv, sizeof(slv), "slave%d", i + 1);
+ snprintf (sts, sizeof(sts), "status%d", i + 1);
+
+ /* Fetching the values from dict, and calculating
+ the max length for each field */
+ ret = dict_get_str (dict, nds, &(sts_vals[i]->node));
if (ret)
goto out;
- ret = dict_get_str (dict, slv, &slv_val);
+ ret = dict_get_str (dict, mst, &(sts_vals[i]->master));
if (ret)
goto out;
- ret = dict_get_str (dict, sts, &sts_val);
+ ret = dict_get_str (dict, slv, &(sts_vals[i]->slave));
if (ret)
goto out;
- cli_out ("%-20s %-50s %-10s", mst_val,
- slv_val, sts_val);
+ ret = dict_get_str (dict, sts, &status);
+ if (ret)
+ goto out;
+ /* Fetching health and uptime from sts_val */
+ ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < number_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log ("", GF_LOG_ERROR,
+ "struct member empty.");
+ ret = -1;
+ goto out;
+ }
+ if (strlen (tmp) > spacing[j])
+ spacing[j] = strlen (tmp);
+ }
}
- out:
+out:
return ret;
+}
+
+int
+gf_cli_gsync_status_output (dict_t *dict, int status_detail)
+{
+ int gsync_count = 0;
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int spacing[10] = {0};
+ int num_of_fields = 9;
+ char errmsg[1024] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *tmp = NULL;
+ char *title_values[] = {"NODE", "MASTER", "SLAVE",
+ "HEALTH", "UPTIME",
+ "FILES SYNCD",
+ "FILES PENDING",
+ "BYTES PENDING",
+ "DELETES PENDING"};
+ gf_cli_gsync_status_t **sts_vals = NULL;
+
+ /* Checks if any session is active or not */
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret) {
+ ret = dict_get_str (dict, "master", &master);
+
+ ret = dict_get_str (dict, "slave", &slave);
+
+ if (master) {
+ if (slave)
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions between %s"
+ " and %s", master, slave);
+ else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions for %s",
+ master);
+ } else
+ snprintf (errmsg, sizeof(errmsg), "No active "
+ "geo-replication sessions");
+
+ gf_log ("cli", GF_LOG_INFO, "%s", errmsg);
+ cli_out ("%s", errmsg);
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; i < num_of_fields; i++)
+ spacing[i] = strlen(title_values[i]);
+
+ /* gsync_count = number of nodes reporting output.
+ each sts_val object will store output of each
+ node */
+ sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *),
+ gf_common_mt_char);
+ if (!sts_vals) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < gsync_count; i++) {
+ sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t),
+ gf_common_mt_char);
+ if (!sts_vals[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = gf_cli_read_status_data (dict, sts_vals, spacing,
+ gsync_count, num_of_fields);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to read status data");
+ goto out;
+ }
+
+ ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count,
+ num_of_fields, status_detail);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to print status output");
+ goto out;
+ }
+
+out:
+ if (sts_vals) {
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 3; j < num_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (tmp)
+ GF_FREE (tmp);
+ }
+ }
+ GF_FREE (sts_vals);
+ }
+
+ return ret;
+}
+
+static int32_t
+write_contents_to_common_pem_file (dict_t *dict, int output_count)
+{
+ char *workdir = NULL;
+ char common_pem_file[PATH_MAX] = "";
+ char *output = NULL;
+ char output_name[PATH_MAX] = "";
+ int bytes_writen = 0;
+ int fd = -1;
+ int ret = -1;
+ int i = -1;
+
+ ret = dict_get_str (dict, "glusterd_workdir", &workdir);
+ if (ret || !workdir) {
+ gf_log ("", GF_LOG_ERROR, "Unable to fetch workdir");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf (common_pem_file, sizeof(common_pem_file),
+ "%s/geo-replication/common_secret.pem.pub",
+ workdir);
+
+ unlink (common_pem_file);
+
+ fd = open (common_pem_file, O_WRONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open %s"
+ " Error : %s", common_pem_file,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ bytes_writen = write (fd, output, strlen(output));
+ if (bytes_writen != strlen(output)) {
+ gf_log ("", GF_LOG_ERROR, "Failed to write "
+ "to %s", common_pem_file);
+ ret = -1;
+ goto out;
+ }
+ /* Adding the new line character */
+ bytes_writen = write (fd, "\n", strlen("\n"));
+ if (bytes_writen != strlen("\n")) {
+ gf_log ("", GF_LOG_ERROR,
+ "Failed to add new line char");
+ ret = -1;
+ goto out;
+ }
+ output = NULL;
+ }
+ }
+
+ cli_out ("Common secret pub file present at %s", common_pem_file);
+ ret = 0;
+out:
+ if (fd)
+ close (fd);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_sys_exec_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int output_count = -1;
+ int i = -1;
+ char *output = NULL;
+ char *command = NULL;
+ char output_name[PATH_MAX] = "";
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Command failed.");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "output_count", &output_count);
+ if (ret) {
+ cli_out ("Command executed successfully.");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "command", &command);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ if (!strcmp (command, "gsec_create")) {
+ ret = write_contents_to_common_pem_file (dict, output_count);
+ if (!ret)
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ memset (output_name, '\0', sizeof (output_name));
+ snprintf (output_name, sizeof (output_name),
+ "output_%d", i);
+ ret = dict_get_str (dict, output_name, &output);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Failed to get %s.",
+ output_name);
+ cli_out ("Unable to fetch output.");
+ }
+ if (output) {
+ cli_out ("%s", output);
+ output = NULL;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_copy_file_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
+ "Copy unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ cli_out ("Successfully copied file.");
+
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+
+ return ret;
+}
+
+int
+gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
int ret = -1;
@@ -3555,17 +4361,22 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
char *gsync_status = NULL;
char *master = NULL;
char *slave = NULL;
- int32_t type = 0;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t status_detail = _gf_false;
+
if (req->rpc_status == -1) {
ret = -1;
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to get response structure");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3581,16 +4392,14 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
if (ret)
goto out;
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volGeoRep", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
+ ret = cli_xml_output_vol_gsync (dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
gf_log ("cli", GF_LOG_ERROR,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret) {
cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
@@ -3607,7 +4416,7 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_int32 (dict, "type", &type);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get type");
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
goto out;
}
@@ -3627,29 +4436,50 @@ gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
break;
case GF_GSYNC_OPTION_TYPE_CONFIG:
- ret = gf_cli3_1_gsync_config_command (dict);
+ ret = gf_cli_gsync_config_command (dict);
break;
case GF_GSYNC_OPTION_TYPE_STATUS:
- ret = gf_cli3_1_gsync_out_status (dict);
- goto out;
+ status_detail = dict_get_str_boolean (dict,
+ "status-detail",
+ _gf_false);
+ ret = gf_cli_gsync_status_output (dict, status_detail);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Deleting " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out ("Creating " GEOREP " session between %s & %s"
+ " has been successful", master, slave);
+ break;
+
default:
cli_out (GEOREP" command executed successfully");
}
out:
-
+ if (dict)
+ dict_unref (dict);
cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_gsync_set (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_sys_exec (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = 0;
dict_t *dict = NULL;
@@ -3657,29 +4487,67 @@ gf_cli3_1_gsync_set (call_frame_t *frame, xlator_t *this,
if (!frame || !this || !data) {
ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
goto out;
}
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ ret = cli_to_glusterd (&req, frame, gf_cli_sys_exec_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYS_EXEC, this, cli_rpc_prog,
+ NULL);
+out:
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_copy_file (call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Invalid data");
goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GSYNC_SET, NULL,
- this, gf_cli3_1_gsync_set_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_copy_file_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog,
+ NULL);
+out:
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+gf_cli_gsync_set (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{0,}};
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_gsync_set_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog,
+ NULL);
out:
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
@@ -3758,7 +4626,7 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", count,
interval, i);
ret = dict_get_double (dict, key, &profile_info[i].max_latency);
- profile_info[i].fop_name = gf_fop_list[i];
+ profile_info[i].fop_name = (char *)gf_fop_list[i];
total_percentage_latency +=
(profile_info[i].fop_hits * profile_info[i].avg_latency);
@@ -3870,7 +4738,7 @@ cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
}
int32_t
-gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -3892,7 +4760,8 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
gf_log ("cli", GF_LOG_DEBUG, "Received resp to profile");
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -3915,7 +4784,6 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
dict->extra_stdfree = rsp.dict.dict_val;
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_profile (dict, rsp.op_ret,
rsp.op_errno,
@@ -3925,7 +4793,6 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
ret = dict_get_str (dict, "volname", &volname);
if (ret)
@@ -3974,7 +4841,7 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
+ cli_err ("All bricks of volume %s are down.", volname);
goto out;
}
@@ -4013,14 +4880,13 @@ gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
out:
if (dict)
dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.op_errstr);
cli_cmd_broadcast_response (ret);
return ret;
}
int32_t
-gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf_cli_req req = {{0,}};
@@ -4034,46 +4900,33 @@ gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
goto out;
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_profile_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_profile_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
int ret = -1;
- dict_t *dict = NULL;
+ dict_t *dict = NULL;
gf1_cli_stats_op op = GF_CLI_STATS_NONE;
char key[256] = {0};
int i = 0;
int32_t brick_count = 0;
char brick[1024];
int32_t members = 0;
- char *filename;
- char *bricks;
+ char *filename;
+ char *bricks;
uint64_t value = 0;
int32_t j = 0;
gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
@@ -4081,11 +4934,10 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
uint64_t max_nr_open = 0;
double throughput = 0;
double time = 0;
- long int time_sec = 0;
+ int32_t time_sec = 0;
long int time_usec = 0;
- struct tm *tm = NULL;
char timestr[256] = {0, };
- char *openfd_str = NULL;
+ char *openfd_str = NULL;
gf_boolean_t nfs = _gf_false;
gf_boolean_t clear_stats = _gf_false;
int stats_cleared = 0;
@@ -4097,7 +4949,8 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
gf_log ("cli", GF_LOG_DEBUG, "Received resp to top");
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to decode response");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -4133,7 +4986,6 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_top (dict, rsp.op_ret,
rsp.op_errno,
@@ -4144,7 +4996,6 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
}
goto out;
}
-#endif
ret = dict_get_int32 (dict, "count", &brick_count);
if (ret)
@@ -4258,11 +5109,9 @@ gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
if (ret)
goto out;
- tm = localtime (&time_sec);
- if (!tm)
- goto out;
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
+ gf_time_fmt (timestr, sizeof timestr,
+ time_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time_usec);
if (strlen (filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
cli_out ("%*"PRIu64" %-*s %-*s",
@@ -4294,13 +5143,12 @@ out:
if (dict)
dict_unref (dict);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ free (rsp.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_top_volume (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf_cli_req req = {{0,}};
@@ -4314,32 +5162,20 @@ gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
goto out;
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_top_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_top_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int
-gf_cli3_1_getwd_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_getwd_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_getwd_rsp rsp = {0,};
@@ -4350,8 +5186,15 @@ gf_cli3_1_getwd_cbk (struct rpc_req *req, struct iovec *iov,
}
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
+ if (ret < 0) {
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ cli_err ("getwd failed");
+ ret = rsp.op_ret;
goto out;
}
@@ -4367,7 +5210,7 @@ out:
}
int32_t
-gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_getwd (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf1_cli_getwd_req req = {0,};
@@ -4380,7 +5223,7 @@ gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_GETWD, NULL,
- this, gf_cli3_1_getwd_cbk,
+ this, gf_cli_getwd_cbk,
(xdrproc_t) xdr_gf1_cli_getwd_req);
out:
@@ -5013,7 +5856,7 @@ cli_print_volume_status_fdtable (dict_t *dict, char *prefix)
if (ret)
goto out;
if (0 == openfds) {
- cli_out ("No open fds");
+ cli_err ("No open fds");
goto out;
}
@@ -5367,8 +6210,125 @@ out:
return;
}
+static void
+cli_print_volume_status_tasks (dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ int task_count = 0;
+ int status = 0;
+ char *op = NULL;
+ char *task_id_str = NULL;
+ char *volname = NULL;
+ char key[1024] = {0,};
+ char task[1024] = {0,};
+ char *brick = NULL;
+ char *src_brick = NULL;
+ char *dest_brick = NULL;
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "tasks", &task_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get tasks count");
+ return;
+ }
+
+ cli_out ("Task Status of Volume %s", volname);
+ cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
+
+ if (task_count == 0) {
+ cli_out ("There are no active volume tasks");
+ cli_out (" ");
+ return;
+ }
+
+ for (i = 0; i < task_count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &op);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "Task", op);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ return;
+ cli_out ("%-20s : %-20s", "ID", task_id_str);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ return;
+
+ snprintf (task, sizeof (task), "task%d", i);
+
+ /*
+ Replace brick only has two states - In progress and Complete
+ Ref: xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+ */
+
+ if (!strcmp (op, "Replace brick")) {
+ if (status)
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ else
+ status = GF_DEFRAG_STATUS_STARTED;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.src-brick", task);
+ ret = dict_get_str (dict, key, &src_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Source Brick", src_brick);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", task);
+ ret = dict_get_str (dict, key, &dest_brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s : %-20s", "Destination Brick",
+ dest_brick);
+
+ } else if (!strcmp (op, "Remove brick")) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.count", task);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", "Removed bricks:");
+
+ for (j = 1; j <= count; j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"%s.brick%d",
+ task, j);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+
+ cli_out ("%-20s", brick);
+ }
+ }
+ cli_out ("%-20s : %-20s", "Status",
+ cli_vol_task_status_str[status]);
+ cli_out (" ");
+ }
+
+out:
+ return;
+}
+
static int
-gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_status_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
int ret = -1;
@@ -5387,6 +6347,7 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
gf_cli_rsp rsp = {0,};
cli_volume_status_t status = {0};
cli_local_t *local = NULL;
+ gf_boolean_t wipe_local = _gf_false;
char msg[1024] = {0,};
if (req->rpc_status == -1)
@@ -5394,13 +6355,23 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "Volume status response error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received response to status cmd");
local = ((call_frame_t *)myframe)->local;
+ if (!local) {
+ local = cli_local_get ();
+ if (!local) {
+ ret = -1;
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get local");
+ goto out;
+ }
+ wipe_local = _gf_true;
+ }
if (rsp.op_ret) {
if (strcmp (rsp.op_errstr, ""))
@@ -5409,14 +6380,15 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
snprintf (msg, sizeof (msg), "Unable to obtain volume "
"status information.");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- cli_xml_output_str ("volStatus", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- ret = 0;
- goto out;
+ if (!local->all)
+ cli_xml_output_str ("volStatus", msg,
+ rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ ret = 0;
+ goto out;
}
-#endif
+
cli_err ("%s", msg);
if (local && local->all) {
ret = 0;
@@ -5442,8 +6414,9 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if ((cmd & GF_CLI_STATUS_ALL)) {
- if (local) {
- local->dict = dict;
+ if (local && local->dict) {
+ dict_ref (dict);
+ ret = dict_set_static_ptr (local->dict, "rsp-dict", dict);
ret = 0;
} else {
gf_log ("cli", GF_LOG_ERROR, "local not found");
@@ -5455,34 +6428,44 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD))
notbrick = _gf_true;
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
- if (count == 0) {
- ret = -1;
- goto out;
- }
-
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
-
- index_max = brick_index_max + other_count;
-
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_status (dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_begin (local,
+ rsp.op_ret,
+ rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+ if (cmd & GF_CLI_STATUS_TASKS) {
+ ret = cli_xml_output_vol_status_tasks_detail (local,
+ dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,"Error outputting "
+ "to xml");
+ goto out;
+ }
+ } else {
+ ret = cli_xml_output_vol_status (local, dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_end (local);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ }
}
goto out;
}
-#endif
status.brick = GF_CALLOC (1, PATH_MAX + 256, gf_common_mt_strdup);
@@ -5507,6 +6490,10 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
cli_print_volume_status_callpool (dict, notbrick);
goto cont;
break;
+ case GF_CLI_STATUS_TASKS:
+ cli_print_volume_status_tasks (dict);
+ goto cont;
+ break;
default:
break;
}
@@ -5515,6 +6502,17 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
if (ret)
goto out;
+ ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+
cli_out ("Status of volume: %s", volname);
if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
@@ -5586,19 +6584,26 @@ gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
}
}
cli_out (" ");
+
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)
+ cli_print_volume_status_tasks (dict);
cont:
ret = rsp.op_ret;
out:
- if (status.brick)
- GF_FREE (status.brick);
+ if (dict)
+ dict_unref (dict);
+ GF_FREE (status.brick);
+ if (local && wipe_local) {
+ cli_local_wipe (local);
+ }
cli_cmd_broadcast_response (ret);
return ret;
}
int32_t
-gf_cli3_1_status_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_status_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -5610,20 +6615,10 @@ gf_cli3_1_status_volume (call_frame_t *frame, xlator_t *this,
dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATUS_VOLUME, NULL,
- this, gf_cli3_1_status_cbk,
- (xdrproc_t)xdr_gf_cli_req);
-
+ ret = cli_to_glusterd (&req, frame, gf_cli_status_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
return ret;
@@ -5638,46 +6633,55 @@ gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
uint32_t cmd = 0;
char key[1024] = {0};
char *volname = NULL;
- dict_t *vol_dict = NULL;
+ void *vol_dict = NULL;
dict_t *dict = NULL;
cli_local_t *local = NULL;
- dict = (dict_t *)data;
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ if (frame->local) {
+ local = frame->local;
+ local->all = _gf_true;
+ } else
goto out;
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Failed to allocate local");
+ ret = dict_get_uint32 (local->dict, "cmd", &cmd);
+ if (ret)
goto out;
- }
- frame->local = local;
- local->all = _gf_true;
- ret = gf_cli3_1_status_volume (frame, this, data);
+
+ ret = gf_cli_status_volume (frame, this, data);
if (ret)
goto out;
- vol_dict = local->dict;
+ ret = dict_get_ptr (local->dict, "rsp-dict", &vol_dict);
+ if (ret)
+ goto out;
- ret = dict_get_int32 (vol_dict, "vol_count", &vol_count);
+ ret = dict_get_int32 ((dict_t *)vol_dict, "vol_count", &vol_count);
if (ret) {
cli_err ("Failed to get names of volumes");
goto out;
}
- if (vol_count == 0) {
- cli_out ("No volumes present");
- ret = 0;
- goto out;
- }
-
/* remove the "all" flag in cmd */
cmd &= ~GF_CLI_STATUS_ALL;
cmd |= GF_CLI_STATUS_VOL;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ //TODO: Pass proper op_* values
+ ret = cli_xml_output_vol_status_begin (local, 0,0, NULL);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Error outputting to xml");
+ goto out;
+ }
+ }
+
+ if (vol_count == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err ("No volumes present");
+ ret = 0;
+ goto out;
+ }
+
for (i = 0; i < vol_count; i++) {
dict = dict_new ();
@@ -5690,7 +6694,7 @@ gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
if (ret)
goto out;
- ret = dict_set_dynstr (dict, "volname", volname);
+ ret = dict_set_str (dict, "volname", volname);
if (ret)
goto out;
@@ -5698,25 +6702,38 @@ gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
if (ret)
goto out;
- ret = gf_cli3_1_status_volume (frame, this, dict);
+ ret = gf_cli_status_volume (frame, this, dict);
if (ret)
goto out;
dict_unref (dict);
}
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_status_end (local);
+ }
+
out:
if (ret)
gf_log ("cli", GF_LOG_ERROR, "status all failed");
+
+ if (vol_dict)
+ dict_unref (vol_dict);
+
if (ret && dict)
dict_unref (dict);
+
+ if (local)
+ cli_local_wipe (local);
+
if (frame)
frame->local = NULL;
+
return ret;
}
static int
-gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_mount_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_mount_rsp rsp = {0,};
@@ -5728,7 +6745,8 @@ gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -5741,7 +6759,7 @@ gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
/* weird sounding but easy to parse... */
cli_err ("%d : failed with this errno (%s)",
rsp.op_errno, strerror (rsp.op_errno));
- ret = 1;
+ ret = -1;
}
out:
@@ -5750,7 +6768,7 @@ out:
}
int32_t
-gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_mount (call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_mount_req req = {0,};
int ret = -1;
@@ -5766,7 +6784,7 @@ gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
req.label = label;
ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ &req.dict.dict_len);
if (ret) {
ret = -1;
goto out;
@@ -5774,7 +6792,7 @@ gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_MOUNT, NULL,
- this, gf_cli3_1_mount_cbk,
+ this, gf_cli_mount_cbk,
(xdrproc_t)xdr_gf1_cli_mount_req);
out:
@@ -5783,7 +6801,7 @@ out:
}
static int
-gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_umount_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf1_cli_umount_rsp rsp = {0,};
@@ -5795,7 +6813,8 @@ gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -5805,7 +6824,7 @@ gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
ret = 0;
else {
cli_err ("umount failed");
- ret = 1;
+ ret = -1;
}
out:
@@ -5814,7 +6833,7 @@ out:
}
int32_t
-gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_umount (call_frame_t *frame, xlator_t *this, void *data)
{
gf1_cli_umount_req req = {0,};
int ret = -1;
@@ -5836,7 +6855,7 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_UMOUNT, NULL,
- this, gf_cli3_1_umount_cbk,
+ this, gf_cli_umount_cbk,
(xdrproc_t)xdr_gf1_cli_umount_req);
out:
@@ -5845,62 +6864,211 @@ gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
}
void
-cmd_heal_volume_brick_out (dict_t *dict, int brick)
+cmd_heal_volume_statistics_out (dict_t *dict, int brick)
{
+
uint64_t num_entries = 0;
int ret = 0;
char key[256] = {0};
char *hostname = NULL;
- char *path = NULL;
- char *status = NULL;
+ uint64_t i = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ cli_out ("------------------------------------------------");
+ cli_out ("\nCrawl statistics for brick no %d", brick);
+ cli_out ("Hostname of brick %s", hostname);
+
+ snprintf (key, sizeof key, "statistics-%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < num_entries; i++)
+ {
+ snprintf (key, sizeof key, "statistics_crawl_type-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &crawl_type);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_healed_cnt-%d-%"PRIu64,
+ brick,i);
+ ret = dict_get_uint64 (dict, key, &healed_count);
+ if (ret)
+ goto out;
+
+ snprintf (key, sizeof key, "statistics_sb_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &split_brain_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_heal_failed_cnt-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_uint64 (dict, key, &heal_failed_count);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_strt_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &start_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_end_time-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_str (dict, key, &end_time_str);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "statistics_inprogress-%d-%"PRIu64,
+ brick, i);
+ ret = dict_get_int32 (dict, key, &progress);
+ if (ret)
+ goto out;
+
+ cli_out ("\nStarting time of crawl: %s", start_time_str);
+ if (progress == 1)
+ cli_out ("Crawl is in progress");
+ else
+ cli_out ("Ending time of crawl: %s", end_time_str);
+
+ cli_out ("Type of crawl: %s", crawl_type);
+ cli_out ("No. of entries healed: %"PRIu64,
+ healed_count);
+ cli_out ("No. of entries in split-brain: %"PRIu64,
+ split_brain_count);
+ cli_out ("No. of heal failed entries: %"PRIu64,
+ heal_failed_count);
+
+ }
+
+
+out:
+ return;
+}
+
+void
+cmd_heal_volume_brick_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
uint64_t i = 0;
uint32_t time = 0;
- char timestr[256];
- struct tm *tm = NULL;
+ char timestr[32] = {0};
+ char *shd_status = NULL;
- snprintf (key, sizeof (key), "%d-hostname", brick);
+ snprintf (key, sizeof key, "%d-hostname", brick);
ret = dict_get_str (dict, key, &hostname);
if (ret)
goto out;
- snprintf (key, sizeof (key), "%d-path", brick);
+ snprintf (key, sizeof key, "%d-path", brick);
ret = dict_get_str (dict, key, &path);
if (ret)
goto out;
cli_out ("\nBrick %s:%s", hostname, path);
- snprintf (key, sizeof (key), "%d-count", brick);
- ret = dict_get_uint64 (dict, key, &num_entries);
- cli_out ("Number of entries: %"PRIu64, num_entries);
- snprintf (key, sizeof (key), "%d-status", brick);
+
+ snprintf (key, sizeof key, "%d-status", brick);
ret = dict_get_str (dict, key, &status);
if (status && strlen (status))
cli_out ("Status: %s", status);
- for (i = 0; i < num_entries; i++) {
- snprintf (key, sizeof (key), "%d-%"PRIu64, brick, i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
- time = 0;
- snprintf (key, sizeof (key), "%d-%"PRIu64"-time", brick, i);
- ret = dict_get_uint32 (dict, key, &time);
- if (!time) {
- cli_out ("%s", path);
- } else {
- tm = localtime ((time_t*)(&time));
- strftime (timestr, sizeof (timestr),
- "%Y-%m-%d %H:%M:%S", tm);
- if (i ==0) {
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-count", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ for (i = 0; i < num_entries; i++) {
+ snprintf (key, sizeof key, "%d-%"PRIu64, brick, i);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ continue;
+ time = 0;
+ snprintf (key, sizeof key, "%d-%"PRIu64"-time",
+ brick, i);
+ ret = dict_get_uint32 (dict, key, &time);
+ if (!time) {
+ cli_out ("%s", path);
+ } else {
+ gf_time_fmt (timestr, sizeof timestr,
+ time, gf_timefmt_FT);
+ if (i == 0) {
cli_out ("at path on brick");
cli_out ("-----------------------------------");
+ }
+ cli_out ("%s %s", timestr, path);
}
- cli_out ("%s %s", timestr, path);
}
}
+
+out:
+ return;
+}
+
+
+void
+cmd_heal_volume_statistics_heal_count_out (dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ char *shd_status = NULL;
+
+ snprintf (key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf (key, sizeof key, "%d-path", brick);
+ ret = dict_get_str (dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out ("\nBrick %s:%s", hostname, path);
+
+ snprintf (key, sizeof key, "%d-status", brick);
+ ret = dict_get_str (dict, key, &status);
+ if (status && strlen (status))
+ cli_out ("Status: %s", status);
+
+ snprintf (key, sizeof key, "%d-shd-status",brick);
+ ret = dict_get_str (dict, key, &shd_status);
+
+ if(!shd_status)
+ {
+ snprintf (key, sizeof key, "%d-hardlinks", brick);
+ ret = dict_get_uint64 (dict, key, &num_entries);
+ if (ret)
+ cli_out ("No gathered input for this brick");
+ else
+ cli_out ("Number of entries: %"PRIu64, num_entries);
+
+
+ }
+
out:
return;
}
+
int
-gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -5913,23 +7081,25 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
int brick_count = 0;
int i = 0;
gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ char *operation = NULL;
+ char *substr = NULL;
+ char *heal_op_str = NULL;
if (-1 == req->rpc_status) {
goto out;
}
+ frame = myframe;
+
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
- frame = myframe;
-
- if (frame) {
+ if (frame)
local = frame->local;
- frame->local = NULL;
- }
if (local) {
input_dict = local->dict;
@@ -5950,17 +7120,68 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = dict_get_str (input_dict, "volname", &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get volname");
goto out;
}
gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Heal operation on volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ switch (heal_op) {
+ case GF_AFR_OP_HEAL_INDEX:
+ heal_op_str = "to perform index self heal";
+ break;
+ case GF_AFR_OP_HEAL_FULL:
+ heal_op_str = "to perform full self heal";
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ heal_op_str = "list of entries to be healed";
+ break;
+ case GF_AFR_OP_HEALED_FILES:
+ heal_op_str = "list of healed entries";
+ break;
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ heal_op_str = "list of heal failed entries";
+ break;
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ heal_op_str = "list of split brain entries";
+ break;
+ case GF_AFR_OP_STATISTICS:
+ heal_op_str = "crawl statistics";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ heal_op_str = "count of entries to be healed";
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ heal_op_str = "count of entries to be healed per replica";
+ break;
+ case GF_AFR_OP_INVALID:
+ heal_op_str = "invalid heal op";
+ break;
+ }
+
+ if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
+ (heal_op == GF_AFR_OP_HEAL_INDEX)) {
+ operation = "Launching heal operation";
+ substr = "\nUse heal info commands to check status";
+ } else {
+ operation = "Gathering";
+ substr = "";
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp (rsp.op_errstr, "")) {
+ cli_err ("%s", rsp.op_errstr);
+ } else {
+ cli_err ("%s %s on volume %s has been unsuccessful",
+ operation, heal_op_str, volname);
+ }
+
+ ret = rsp.op_ret;
+ goto out;
+ } else {
+ cli_out ("%s %s on volume %s has been successful %s", operation,
+ heal_op_str, volname, substr);
+ }
ret = rsp.op_ret;
if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
@@ -5989,32 +7210,48 @@ gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
+ cli_err ("All bricks of volume %s are down.", volname);
goto out;
}
- for (i = 0; i < brick_count; i++)
- cmd_heal_volume_brick_out (dict, i);
+ switch (heal_op) {
+ case GF_AFR_OP_STATISTICS:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_out (dict, i);
+ break;
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT:
+ case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_heal_count_out (dict,
+ i);
+ break;
+ case GF_AFR_OP_INDEX_SUMMARY:
+ case GF_AFR_OP_HEALED_FILES:
+ case GF_AFR_OP_HEAL_FAILED_FILES:
+ case GF_AFR_OP_SPLIT_BRAIN_FILES:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out (dict, i);
+ break;
+ default:
+ break;
+ }
+
ret = rsp.op_ret;
out:
cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ free (rsp.op_errstr);
if (dict)
dict_unref (dict);
return ret;
}
int32_t
-gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_heal_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
int ret = 0;
- cli_local_t *local = NULL;
dict_t *dict = NULL;
if (!frame || !this || !data) {
@@ -6023,39 +7260,22 @@ gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
}
dict = data;
- local = cli_local_get ();
-
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_HEAL_VOLUME, NULL,
- this, gf_cli3_1_heal_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_heal_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, dict,
+ GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -6067,7 +7287,8 @@ gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received response to statedump");
@@ -6076,7 +7297,6 @@ gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
else
snprintf (msg, sizeof (msg), "Volume statedump successful");
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_str ("volStatedump", msg, rsp.op_ret,
rsp.op_errno, rsp.op_errstr);
@@ -6085,12 +7305,11 @@ gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
if (rsp.op_ret)
- cli_err ("%s", msg);
+ cli_err ("volume statedump: failed: %s", msg);
else
- cli_out ("%s", msg);
+ cli_out ("volume statedump: success");
ret = rsp.op_ret;
out:
@@ -6099,7 +7318,7 @@ out:
}
int32_t
-gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_statedump_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -6111,33 +7330,20 @@ gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
options = data;
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATEDUMP_VOLUME, NULL,
- this, gf_cli3_1_statedump_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ ret = cli_to_glusterd (&req, frame, gf_cli_statedump_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog,
+ NULL);
out:
- if (options)
- dict_destroy (options);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
int32_t
-gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
int ret = -1;
@@ -6153,7 +7359,8 @@ gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
@@ -6169,7 +7376,6 @@ gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
}
-#if (HAVE_LIB_XML)
if (global_state->mode & GLUSTER_MODE_XML) {
ret = cli_xml_output_vol_list (dict, rsp.op_ret, rsp.op_errno,
rsp.op_errstr);
@@ -6178,7 +7384,7 @@ gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
"Error outputting to xml");
goto out;
}
-#endif
+
if (rsp.op_ret)
cli_err ("%s", rsp.op_errstr);
else {
@@ -6187,7 +7393,7 @@ gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
goto out;
if (vol_count == 0) {
- cli_out ("No volumes present in cluster");
+ cli_err ("No volumes present in cluster");
goto out;
}
for (i = 0; i < vol_count; i++) {
@@ -6208,7 +7414,7 @@ out:
}
int32_t
-gf_cli3_1_list_volume (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_list_volume (call_frame_t *frame, xlator_t *this, void *data)
{
int ret = -1;
gf_cli_req req = {{0,}};
@@ -6218,7 +7424,7 @@ gf_cli3_1_list_volume (call_frame_t *frame, xlator_t *this, void *data)
ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
GLUSTER_CLI_LIST_VOLUME, NULL,
- this, gf_cli3_1_list_volume_cbk,
+ this, gf_cli_list_volume_cbk,
(xdrproc_t)xdr_gf_cli_req);
out:
@@ -6227,7 +7433,7 @@ out:
}
int32_t
-gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
+gf_cli_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
int count, void *myframe)
{
gf_cli_rsp rsp = {0,};
@@ -6241,8 +7447,8 @@ gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = xdr_to_generic (*iov, &rsp,
(xdrproc_t)xdr_gf_cli_rsp);
if (ret < 0) {
-
- gf_log ("cli", GF_LOG_ERROR, "XDR decoding failed");
+ gf_log (((call_frame_t *) myframe)->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
goto out;
}
gf_log ("cli", GF_LOG_DEBUG, "Received response to clear-locks");
@@ -6253,7 +7459,7 @@ gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
} else {
if (!rsp.dict.dict_len) {
- cli_out ("Possibly no locks cleared");
+ cli_err ("Possibly no locks cleared");
ret = 0;
goto out;
}
@@ -6296,12 +7502,14 @@ gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
ret = rsp.op_ret;
out:
+ if (dict)
+ dict_unref (dict);
cli_cmd_broadcast_response (ret);
return ret;
}
int32_t
-gf_cli3_1_clearlocks_volume (call_frame_t *frame, xlator_t *this,
+gf_cli_clearlocks_volume (call_frame_t *frame, xlator_t *this,
void *data)
{
gf_cli_req req = {{0,}};
@@ -6313,67 +7521,1147 @@ gf_cli3_1_clearlocks_volume (call_frame_t *frame, xlator_t *this,
options = data;
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ ret = cli_to_glusterd (&req, frame, gf_cli_clearlocks_volume_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog,
+ NULL);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+
+ GF_FREE (req.dict.dict_val);
+ return ret;
+}
+
+int32_t
+cli_snapshot_remove_reply (gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+{
+ int32_t ret = -1;
+ char *snap_name = NULL;
+
+ GF_ASSERT (rsp);
+ GF_ASSERT (dict);
+ GF_ASSERT (frame);
+
+ if (rsp->op_ret) {
+ cli_err("snapshot delete: failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ cli_out ("snapshot delete: %s: snap removed successfully",
+ snap_name);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+cli_snapshot_config_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ uint64_t i = 0;
+ uint64_t voldisplaycount = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Config : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "config-command", &config_command);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit);
+ /* Ignore the error, as the key specified is optional */
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit);
+
+ if (!hard_limit && !soft_limit
+ && config_command != GF_SNAP_CONFIG_DISPLAY) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Could not fetch config-key");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit && soft_limit) {
+ cli_out ("snapshot config: snap-max-hard-limit "
+ "& snap-max-soft-limit for system set "
+ "successfully");
+ } else if (hard_limit){
+ cli_out ("snapshot config: %s "
+ "for snap-max-hard-limit set successfully",
+ volname);
+ } else if (soft_limit) {
+ cli_out ("snapshot config: %s "
+ "for snap-max-soft-limit set successfully",
+ volname);
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY :
+ cli_out ("\nSnapshot System Configuration:");
+ ret = dict_get_uint64 (dict, "snap-max-hard-limit",
+ &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap_max_hard_limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ ret = dict_get_uint64 (dict, "snap-max-soft-limit",
+ &soft_limit);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "snap-max-soft-limit for %s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-soft-limit : %"PRIu64"%%\n",
+ soft_limit);
+
+ cli_out ("Snapshot Volume Configuration:");
+
+ ret = dict_get_uint64 (dict, "voldisplaycount",
+ &voldisplaycount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not fetch voldisplaycount");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf (buf, sizeof(buf), "volume%ld-volname", i);
+ ret = dict_get_str (dict, buf, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("\nVolume : %s", volname);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("snap-max-hard-limit : %"PRIu64, value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-active-hard-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch"
+ " effective snap_max_hard_limit for "
+ "%s", volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-hard-limit : %"PRIu64,
+ value);
+
+ snprintf (buf, sizeof(buf),
+ "volume%ld-snap-max-soft-limit", i);
+ ret = dict_get_uint64 (dict, buf, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ " %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out ("Effective snap-max-soft-limit : %"PRIu64" "
+ "(%"PRIu64"%%)", value, soft_limit);
+ }
+ break;
+ default :
+ break;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is used to print the volume related information
+ * of a snap.
+ *
+ * arg - 0, dict : Response Dictionary.
+ * arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
+ */
+int
+cli_get_each_volinfo_in_snap (dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven) {
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int value = 0;
+ int ret = -1;
+ char indent[5] = "\t";
+ char *volname = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ if (snap_driven) {
+ ret = snprintf (key, sizeof (key), "%s.volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Snap Volume Name", ":", get_buffer);
+
+ ret = snprintf (key, sizeof (key),
+ "%s.origin-volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &volname);
+ if (ret) {
+ gf_log ("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ cli_out ("%-12s", "Origin:");
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Origin Volume name", ":", volname);
+
+
+ ret = snprintf (key, sizeof (key), "%s.snapcount",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps taken for",
+ volname, ":", value);
+
+ ret = snprintf (key, sizeof (key), "%s.snaps-available",
+ keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &value);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out ("%s%s %s %s %d", indent, "Snaps available for",
+ volname, ":", value);
+ }
+
+
+ ret = snprintf (key, sizeof (key), "%s.vol-status", keyprefix);
if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
+ goto out;
+ }
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get %s", key);
goto out;
}
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Status",
+ ":", get_buffer);
+out :
+ return ret;
+}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CLRLOCKS_VOLUME, NULL,
- this, gf_cli3_1_clearlocks_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+/* This function is used to print snap related information
+ * arg - 0, dict : Response dictionary.
+ * arg - 1, prefix_str : snaplist.snap{0..}.*
+ */
+int
+cli_get_volinfo_in_snap (dict_t *dict, char *keyprefix) {
+
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ int ret = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.vol-count", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ for (i = 1 ; i <= volcount ; i++) {
+ ret = snprintf (key, sizeof (key),
+ "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_true);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not list "
+ "details of volume in a snap");
+ goto out;
+ }
+ cli_out (" ");
+ }
+
+out :
+ return ret;
+}
+
+int
+cli_get_each_snap_info (dict_t *dict, char *prefix_str,
+ gf_boolean_t snap_driven) {
+ char key_buffer[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int ret = -1;
+ char indent[5] = "";
+
+ GF_ASSERT (dict);
+ GF_ASSERT (prefix_str);
+
+ if (!snap_driven)
+ strcat (indent, "\t");
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snapname",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snapshot",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-id",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ",
+ key_buffer);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Snap UUID",
+ ":", get_buffer);
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-desc",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (!ret) {
+ /* Ignore error for description */
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent,
+ "Description", ":", get_buffer);
+ }
+
+ ret = snprintf (key_buffer, sizeof (key_buffer), "%s.snap-time",
+ prefix_str);
+ if (ret < 0 ) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ",
+ prefix_str);
+ goto out;
+ }
+ cli_out ("%s" INDENT_MAIN_HEAD "%s", indent, "Created",
+ ":", get_buffer);
+
+ if (snap_driven) {
+ cli_out ("%-12s", "Snap Volumes:\n");
+ ret = cli_get_volinfo_in_snap (dict, prefix_str);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to list details "
+ "of the snaps");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+/* This is a generic function to print snap related information.
+ * arg - 0, dict : Response Dictionary
+ */
+int
+cli_call_snapshot_info (dict_t *dict, gf_boolean_t bool_snap_driven) {
+ int snap_count = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snap_count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap-count");
+ goto out;
+ }
+
+ if (snap_count == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snap_count ; i++) {
+ ret = snprintf (key, sizeof (key), "snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_snap_info (dict, key, bool_snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snaps_in_volume (dict_t *dict) {
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int avail = 0;
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_str (dict, "origin-volname", &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
+
+ ret = dict_get_int32 (dict, "snap-count", &avail);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap-count");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
+
+ ret = dict_get_int32 (dict, "snaps-available", &count);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
+ goto out;
+ }
+ cli_out (INDENT_MAIN_HEAD "%d", "Snaps Available", ":", count);
+
+ for (i = 1 ; i <= avail ; i++) {
+ snprintf (key, sizeof (key), "snap%d", i);
+ ret = cli_get_each_snap_info (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Unable to print snap details");
+ goto out;
+ }
+
+ ret = snprintf (key, sizeof (key), "snap%d.vol1", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap (dict, key, _gf_false);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get volume "
+ "related information");
+ goto out;
+ }
+
+ cli_out (" ");
+ }
+out :
+ return ret;
+}
+
+int
+cli_snapshot_list (dict_t *dict) {
+ int snapcount = 0;
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int i = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "snap-count", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch snap count");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 1 ; i <= snapcount ; i++) {
+ ret = snprintf (key, sizeof (key), "snapname%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ cli_out ("%s", get_buffer);
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_get_snap_volume_status (dict_t *dict, char *key_prefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (key_prefix);
+
+ ret = snprintf (key, sizeof (key), "%s.brickcount", key_prefix);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_int32 (dict, key, &brickcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ for ( i = 0 ; i < brickcount ; i++ ) {
+ ret = snprintf (key, sizeof (key), "%s.brick%d.path",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Path");
+ continue;
+ }
+ cli_out ("\n\t%-17s %s %s", "Brick Path", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.vgname",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Volume Group");
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Volume Group", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.status",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Brick Running");
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Brick Running", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.pid",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &pid);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get pid");
+ cli_out ("\t%-17s %s %s", "Brick PID", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %d", "Brick PID", ":", pid);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.data",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO,
+ "Unable to get Data Percent");
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "Data Percentage", ":", buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.brick%d.lvsize",
+ key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_str (dict, key, &buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_INFO, "Unable to get LV Size");
+ cli_out ("\t%-17s %s %s", "LV Size", ":", "N/A");
+ } else
+ cli_out ("\t%-17s %s %s", "LV Size", ":", buffer);
+
+ }
+out :
+ return ret;
+}
+
+
+
+int
+cli_get_single_snap_status (dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (keyprefix);
+
+ ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+ cli_out ("\nSnap Name : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str (dict, key, &get_buffer);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+ cli_out ("Snap UUID : %s", get_buffer);
+
+ ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, key, &volcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ for (i = 0 ; i < volcount ; i++) {
+ ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = cli_get_snap_volume_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Could not get snap volume status");
+ goto out;
+ }
+ }
+out :
+ return ret;
+}
+
+int
+cli_snap_status_all (dict_t *dict) {
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ int snapcount = 0;
+ int i = 0;
+
+ GF_ASSERT (dict);
+
+ ret = dict_get_int32 (dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out ("No snapshots present");
+ }
+
+ for (i = 0 ; i < snapcount; i++) {
+ ret = snprintf (key, sizeof (key), "status.snap%d",i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ }
+out:
+ return ret;
+}
+
+
+int
+cli_snapshot_status_display (dict_t *dict, gf_cli_rsp *rsp)
+{
+ char key[PATH_MAX] = "";
+ int ret = -1;
+ int status_cmd = -1;
+
+ GF_ASSERT (dict);
+ GF_ASSERT (rsp);
+
+ if (rsp->op_ret) {
+ cli_err ("Snapshot Status : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr :
+ "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "cmd", &status_cmd);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+ switch (status_cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of all snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_SNAP :
+ {
+ ret = snprintf (key, sizeof (key), "status.snap0");
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_single_snap_status (dict, key);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_STATUS_TYPE_VOL :
+ {
+ ret = cli_snap_status_all (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Could not fetch "
+ "status of snap in a volume");
+ goto out;
+ }
+ break;
+ }
+ default :
+ break;
+ }
+out :
+ return ret;
+}
+
+int
+gf_cli_snapshot_cbk (struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {0, };
+ dict_t *dict = NULL;
+ char *snap_name = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t snap_driven = _gf_false;
+
+ if (req->rpc_status == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log (frame->this->name, GF_LOG_ERROR,
+ "Failed to decode xdr response");
+ goto out;
+ }
+
+ dict = dict_new ();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log (frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot create: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+ cli_out ("snapshot create: %s: snap created successfully",
+ snap_name);
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ /* TODO: Check if rsp.op_ret needs to be checked here. Or is
+ * it ok to check this in the start of the function where we
+ * get rsp.*/
+ if (rsp.op_ret) {
+ cli_err("snapshot restore: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str (dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ cli_out ("Snapshot restore: %s: Snap restored "
+ "successfully", snap_name);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_INFO:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot info : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ snap_driven = dict_get_str_boolean (dict, "snap-driven",
+ _gf_false);
+ if (snap_driven == _gf_true) {
+ ret = cli_call_snapshot_info (dict, snap_driven);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ } else if (snap_driven == _gf_false) {
+ ret = cli_get_snaps_in_volume (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Snapshot info failed");
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_snapshot_config_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot config output.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ if (rsp.op_ret) {
+ cli_err ("Snapshot list : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr :
+ "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = cli_snapshot_list (dict);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot list");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = cli_snapshot_remove_reply (&rsp, dict, frame);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR,
+ "Failed to delete snap");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_snapshot_status_display (dict, &rsp);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to display "
+ "snapshot status output.");
+ goto out;
+ }
+ break;
+
+ default:
+ cli_err ("Unknown command executed");
+ ret = -1;
+ goto out;
+ }
+out:
+ if (dict)
+ dict_unref (dict);
+ cli_cmd_broadcast_response (ret);
+
+ free (rsp.dict.dict_val);
+ free (rsp.op_errstr);
+
+ return ret;
+}
+
+int32_t
+gf_cli_snapshot (call_frame_t *frame, xlator_t *this,
+ void *data)
+{
+ gf_cli_req req = {{0,}};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ options = data;
+
+ ret = cli_to_glusterd (&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t) xdr_gf_cli_req, options,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog,
+ NULL);
out:
- if (options)
- dict_destroy (options);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
return ret;
}
+int
+cli_to_glusterd (gf_cli_req *req, call_frame_t *frame,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc, dict_t *dict,
+ int procnum, xlator_t *this, rpc_clnt_prog_t *prog,
+ struct iobref *iobref)
+{
+ int ret = 0;
+ size_t len = 0;
+ char *cmd = NULL;
+ int i = 0;
+ const char **words = NULL;
+ cli_local_t *local = NULL;
+
+ if (!this || !frame || !dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!frame->local) {
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local->words) {
+ ret = -1;
+ goto out;
+ }
+
+ words = local->words;
+
+ while (words[i])
+ len += strlen (words[i++]) + 1;
+
+ cmd = GF_CALLOC (1, len, gf_common_mt_char);
+
+ if (!cmd) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; words[i]; i++) {
+ strncat (cmd, words[i], strlen (words[i]));
+ if (words[i+1] != NULL)
+ strncat (cmd, " ", strlen (" "));
+ }
+
+ cmd [len - 1] = '\0';
+
+ ret = dict_set_dynstr (dict, "cmd-str", cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_allocate_and_serialize (dict, &(req->dict).dict_val,
+ &(req->dict).dict_len);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit (req, frame, prog, procnum, iobref, this,
+ cbkfn, (xdrproc_t) xdrproc);
+
+out:
+ return ret;
+
+}
+
struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
[GLUSTER_CLI_NULL] = {"NULL", NULL },
- [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe},
- [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli3_1_deprobe},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli3_1_list_friends},
- [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli3_1_create_volume},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli3_1_delete_volume},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli3_1_start_volume},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli3_1_stop_volume},
- [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli3_1_rename_volume},
- [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli3_1_defrag_volume},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli3_1_get_volume},
- [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli3_1_get_next_volume},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli3_1_set_volume},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli3_1_add_brick},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli3_1_remove_brick},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli3_1_replace_brick},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli3_1_log_rotate},
- [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli3_1_getspec},
- [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli3_1_pmap_b2p},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli3_1_sync_volume},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli3_1_reset_volume},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli3_1_fsm_log},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli3_1_gsync_set},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli3_1_profile_volume},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli3_1_quota},
- [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli3_1_top_volume},
- [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume},
+ [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
+ [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli_list_friends},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", gf_cli3_1_uuid_reset},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", gf_cli3_1_uuid_get},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli_create_volume},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli_delete_volume},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli_start_volume},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli_stop_volume},
+ [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli_rename_volume},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli_defrag_volume},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli_get_volume},
+ [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli_get_next_volume},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli_set_volume},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli_add_brick},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli_remove_brick},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli_replace_brick},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli_log_rotate},
+ [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli_getspec},
+ [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli_pmap_b2p},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli_sync_volume},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli_reset_volume},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli_fsm_log},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli_gsync_set},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli_profile_volume},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli_quota},
+ [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli_top_volume},
+ [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli_getwd},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli_status_volume},
[GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume},
- [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli3_1_statedump_volume},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli3_1_list_volume},
- [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli3_1_clearlocks_volume},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli_mount},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli_heal_volume},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli_statedump_volume},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli_list_volume},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli_clearlocks_volume},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", gf_cli_copy_file},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
};
struct rpc_clnt_program cli_prog = {
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index 702a7f7fe..d8884d44b 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdlib.h>
#include "cli.h"
#include "cli1-xdr.h"
@@ -25,6 +15,39 @@
#include "syscall.h"
+enum gf_task_types {
+ GF_TASK_TYPE_REBALANCE,
+ GF_TASK_TYPE_REMOVE_BRICK
+};
+
+/*
+ * IMPORTANT NOTE:
+ * All exported functions in this file which use libxml need use a
+ * #if (HAVE_LIB_XML), #else, #endif
+ * For eg,
+ * int exported_func () {
+ * #if (HAVE_LIB_XML)
+ * <Stuff using libxml>
+ * #else
+ * return 0;
+ * #endif
+ * }
+ *
+ * All other functions, which are called internally within this file need to be
+ * within #if (HAVE_LIB_XML), #endif statements
+ * For eg,
+ * #if (HAVE_LIB_XML)
+ * int internal_func ()
+ * {
+ * }
+ * #endif
+ *
+ * Following the above formate ensures that all xml related code is compliled
+ * only when libxml2 is present, and also keeps the rest of the codebase free
+ * of #if (HAVE_LIB_XML)
+ */
+
+
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
@@ -40,18 +63,11 @@
}while (0) \
int
-cli_begin_xml_output (xmlTextWriterPtr *writer, xmlBufferPtr *buf)
+cli_begin_xml_output (xmlTextWriterPtr *writer, xmlDocPtr *doc)
{
int ret = -1;
- *buf = xmlBufferCreateSize (8192);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
- xmlBufferSetAllocationScheme (*buf, XML_BUFFER_ALLOC_DOUBLEIT);
-
- *writer = xmlNewTextWriterMemory (*buf, 0);
+ *writer = xmlNewTextWriterDoc (doc, 0);
if (writer == NULL) {
ret = -1;
goto out;
@@ -70,7 +86,7 @@ out:
}
int
-cli_end_xml_output (xmlTextWriterPtr writer, xmlBufferPtr buf)
+cli_end_xml_output (xmlTextWriterPtr writer, xmlDocPtr doc)
{
int ret = -1;
@@ -81,10 +97,12 @@ cli_end_xml_output (xmlTextWriterPtr writer, xmlBufferPtr buf)
ret = xmlTextWriterEndDocument (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- cli_out ("%s", (const char *)buf->content);
+
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc ("-", doc, "UTF-8", 1);
xmlFreeTextWriter (writer);
- xmlBufferFree (buf);
+ xmlFreeDoc (doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
@@ -106,23 +124,25 @@ cli_xml_output_common (xmlTextWriterPtr writer, int op_ret, int op_errno,
XML_RET_CHECK_AND_GOTO (ret, out);
ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrstr",
- "%s", op_errstr);
+ "%s", op_errstr);
XML_RET_CHECK_AND_GOTO (ret, out);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+#endif
int
cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_begin_xml_output (&writer, &doc);
if (ret)
goto out;
@@ -130,22 +150,32 @@ cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"cliOp",
- "%s", op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"cliOp",
+ "%s", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"output",
- "%s", str);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (str) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"output",
+ "%s", str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
-void
+#if (HAVE_LIB_XML)
+int
cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
void *data)
{
@@ -157,36 +187,48 @@ cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
ret = xmlTextWriterWriteFormatElement (*writer, (xmlChar *)key,
"%s", value->data);
- return;
+ return ret;
}
+#endif
int
cli_xml_output_dict ( char *op, dict_t *dict, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
if (ret)
goto out;
+
/* <"op"> */
ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
XML_RET_CHECK_AND_GOTO (ret, out);
- dict_foreach (dict, cli_xml_output_data_pair, &writer);
+ if (dict)
+ dict_foreach (dict, cli_xml_output_data_pair, &writer);
/* </"op"> */
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
int brick_index, int *online,
@@ -222,15 +264,6 @@ cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.port", brick_index);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"port",
- "%d", port);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.status", brick_index);
ret = dict_get_int32 (dict, key, &status);
if (ret)
@@ -241,6 +274,27 @@ cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
*online = status;
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "brick%d.port", brick_index);
+ ret = dict_get_int32 (dict, key, &port);
+ if (ret)
+ goto out;
+
+ /* If the process is either offline or doesn't provide a port (shd)
+ * port = "N/A"
+ * else print the port number of the process.
+ */
+
+ if (*online == 1 && port != 0)
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%d", port);
+ else
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"port",
+ "%s", "N/A");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "brick%d.pid", brick_index);
ret = dict_get_int32 (dict, key, &pid);
if (ret)
@@ -1277,14 +1331,276 @@ out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+#endif
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_common (local->writer, op_ret, op_errno,
+ op_errstr);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volStatus> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *) "volStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ /* </volumes> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volStatus> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output (local->writer, local->doc);
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_remove_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {0,};
+ int count = 0;
+ int i = 0;
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.count", prefix);
+ ret = dict_get_int32 (dict, key, &count);
+ if (ret)
+ goto out;
+
+ for (i = 1; i <= count; i++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.brick%d", prefix, i);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"brick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ brick = NULL;
+ }
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_replace_brick_task_params (xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+
+ int ret = -1;
+ char key[1024] = {0,};
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ snprintf (key, sizeof (key), "%s.src-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"srcBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "%s.dst-brick", prefix);
+ ret = dict_get_str (dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"dstBrick",
+ "%s", brick);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ /* </param> */
+ ret = xmlTextWriterEndElement (writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks (cli_local_t *local, dict_t *dict) {
+ int ret = -1;
+ char *task_type = NULL;
+ char *task_id_str = NULL;
+ int status = 0;
+ int tasks = 0;
+ char key[1024] = {0,};
+ int i = 0;
+
+ /* <tasks> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"tasks");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "tasks", &tasks);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < tasks; i++) {
+ /* <task> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"task");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.type", i);
+ ret = dict_get_str (dict, key, &task_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%s", task_type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.id", i);
+ ret = dict_get_str (dict, key, &task_id_str);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!strcmp (task_type, "Replace brick")) {
+ if (status) {
+ status = GF_DEFRAG_STATUS_COMPLETE;
+ } else {
+ status = GF_DEFRAG_STATUS_STARTED;
+ }
+ }
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status]);
+
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "task%d", i);
+ if (!strcmp (task_type, "Replace brick")) {
+ ret = cli_xml_output_replace_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ } else if (!strcmp (task_type, "Remove brick")) {
+ ret = cli_xml_output_remove_brick_task_params
+ (local->writer, dict, key);
+ if (ret)
+ goto out;
+ }
+
+
+ /* </task> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </tasks> */
+ ret = xmlTextWriterEndElement (local->writer);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+
+ /*<volume>*/
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
char *volname = NULL;
int brick_count = 0;
int brick_index_max = -1;
@@ -1295,30 +1611,25 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
gf_boolean_t node_present = _gf_true;
int i;
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
-
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
- /* <volStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volStatus");
+ /* <volume> */
+ ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volume");
XML_RET_CHECK_AND_GOTO (ret, out);
ret = dict_get_str (dict, "volname", &volname);
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volName",
- "%s", volname);
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"volName", "%s",
+ volname);
XML_RET_CHECK_AND_GOTO (ret, out);
ret = dict_get_int32 (dict, "count", &brick_count);
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
- "%d", brick_count);
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"nodeCount", "%d",
+ brick_count);
if (ret)
goto out;
@@ -1337,10 +1648,11 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
for (i = 0; i <= index_max; i++) {
/* <node> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"node");
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_xml_output_vol_status_common (writer, dict, i,
+ ret = cli_xml_output_vol_status_common (local->writer, dict, i,
&online, &node_present);
if (ret) {
if (node_present)
@@ -1351,7 +1663,7 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
switch (cmd & GF_CLI_STATUS_MASK) {
case GF_CLI_STATUS_DETAIL:
- ret = cli_xml_output_vol_status_detail (writer,
+ ret = cli_xml_output_vol_status_detail (local->writer,
dict, i);
if (ret)
goto out;
@@ -1360,7 +1672,7 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_STATUS_MEM:
if (online) {
ret = cli_xml_output_vol_status_mem
- (writer, dict, i);
+ (local->writer, dict, i);
if (ret)
goto out;
}
@@ -1369,7 +1681,7 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_STATUS_CLIENTS:
if (online) {
ret = cli_xml_output_vol_status_clients
- (writer, dict, i);
+ (local->writer, dict, i);
if (ret)
goto out;
}
@@ -1378,7 +1690,7 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_STATUS_INODE:
if (online) {
ret = cli_xml_output_vol_status_inode
- (writer, dict, i);
+ (local->writer, dict, i);
if (ret)
goto out;
}
@@ -1387,7 +1699,7 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_STATUS_FD:
if (online) {
ret = cli_xml_output_vol_status_fd
- (writer, dict, i);
+ (local->writer, dict, i);
if (ret)
goto out;
}
@@ -1396,47 +1708,54 @@ cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_STATUS_CALLPOOL:
if (online) {
ret = cli_xml_output_vol_status_callpool
- (writer, dict, i);
+ (local->writer, dict, i);
if (ret)
goto out;
}
break;
-
default:
break;
}
/* </node> */
- ret = xmlTextWriterEndElement (writer);
+ ret = xmlTextWriterEndElement (local->writer);
XML_RET_CHECK_AND_GOTO (ret, out);
}
- /* </volStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* Tasks are only present when a normal volume status call is done on a
+ * single volume or on all volumes
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & (GF_CLI_STATUS_VOL|GF_CLI_STATUS_ALL))) {
+ ret = cli_xml_output_vol_status_tasks (local, dict);
+ if (ret)
+ goto out;
+ }
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- ret = cli_end_xml_output (writer, buf);
- if (ret)
- goto out;
+ /* </volume> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
int brick_index, int member_index)
{
- int ret = -1;
- char *filename = NULL;
- uint64_t throughput = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- struct tm *tm = NULL;
- char timestr[256] = {0,};
- char key[1024] = {0,};
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t throughput = 0;
+ long int time_sec = 0;
+ long int time_usec = 0;
+ char timestr[256] = {0,};
+ char key[1024] = {0,};
/* <file> */
ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
@@ -1474,14 +1793,9 @@ cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
if (ret)
goto out;
- tm = localtime (&time_sec);
- if (!tm) {
- ret = -1;
- goto out;
- }
- strftime (timestr, sizeof (timestr), "%Y-%m-%d %H:%M:%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time_sec, gf_timefmt_FT);
snprintf (timestr + strlen (timestr),
- sizeof (timestr) - strlen (timestr),
+ sizeof timestr - strlen (timestr),
".%"GF_PRI_SUSECONDS, time_usec);
ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"time",
"%s", timestr);
@@ -1535,14 +1849,16 @@ out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+#endif
int
cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
int brick_count = 0;
int top_op = GF_CLI_TOP_NONE;
char *brick_name = NULL;
@@ -1556,7 +1872,7 @@ cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
int i = 0;
int j = 0;
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_begin_xml_output (&writer, &doc);
if (ret)
goto out;
@@ -1645,8 +1961,6 @@ cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
case GF_CLI_TOP_WRITE:
case GF_CLI_TOP_OPENDIR:
case GF_CLI_TOP_READDIR:
- if (!members)
- continue;
break;
@@ -1672,9 +1986,6 @@ cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
"%f", time_taken);
}
- if (!members)
- continue;
-
break;
default:
@@ -1704,13 +2015,17 @@ cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
/* </volTop> */
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
cli_xml_output_vol_profile_stats (xmlTextWriterPtr writer, dict_t *dict,
int brick_index, int interval)
@@ -1886,14 +2201,16 @@ out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+#endif
int
cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
char *volname = NULL;
int op = GF_CLI_STATS_NONE;
int brick_count = 0;
@@ -1902,7 +2219,7 @@ cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
char key[1024] = {0,};
int i = 0;
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_begin_xml_output (&writer, &doc);
if (ret)
goto out;
@@ -1982,26 +2299,29 @@ cont:
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
-
+#else
+ return 0;
+#endif
}
int
cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
int count = 0;
char *volname = NULL;
char key[1024] = {0,};
int i = 0;
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_begin_xml_output (&writer, &doc);
if (ret)
goto out;
@@ -2036,12 +2356,16 @@ cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
cli_xml_output_vol_info_option (xmlTextWriterPtr writer, char *substr,
char *optstr, char *valstr)
@@ -2086,23 +2410,44 @@ out:
return ret;
}
+struct tmp_xml_option_logger {
+ char *key;
+ xmlTextWriterPtr writer;
+};
+
+static int
+_output_vol_info_option (dict_t *d, char *k, data_t *v,
+ void *data)
+{
+ int ret = 0;
+ char *ptr = NULL;
+ struct tmp_xml_option_logger *tmp = NULL;
+
+ tmp = data;
+
+ ptr = strstr (k, "option.");
+ if (!ptr)
+ goto out;
+
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = cli_xml_output_vol_info_option (tmp->writer, tmp->key, k,
+ v->data);
+
+out:
+ return ret;
+}
+
int
cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
char *prefix)
{
int ret = -1;
int opt_count = 0;
- data_pair_t *pairs = 0;
- data_t *value = 0;
- char *ptr = NULL;
char key[1024] = {0,};
- int i = 0;
-
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
- }
+ struct tmp_xml_option_logger tmp = {0,};
snprintf (key, sizeof (key), "%s.opt_count", prefix);
ret = dict_get_int32 (dict, key, &opt_count);
@@ -2115,25 +2460,14 @@ cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
/* <options> */
ret = xmlTextWriterStartElement (writer, (xmlChar *)"options");
XML_RET_CHECK_AND_GOTO (ret, out);
- while (i < opt_count) {
- snprintf (key, sizeof (key), "%s.option.", prefix);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- ret = cli_xml_output_vol_info_option
- (writer, key, pairs->key, value->data);
- if (ret)
- goto out;
- }
- pairs = pairs->next;
- }
- i++;
- }
+ snprintf (key, sizeof (key), "%s.option.", prefix);
+
+ tmp.key = key;
+ tmp.writer = writer;
+ ret = dict_foreach (dict, _output_vol_info_option, &tmp);
+ if (ret)
+ goto out;
+
/* </options> */
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
@@ -2141,14 +2475,17 @@ out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
+#endif
int
cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
{
+#if (HAVE_LIB_XML)
int ret = 0;
int count = 0;
char *volname = NULL;
char *volume_id = NULL;
+ char *uuid = NULL;
int type = 0;
int status = 0;
int brick_count = 0;
@@ -2160,7 +2497,9 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
char key[1024] = {0,};
int i = 0;
int j = 1;
-
+ char *caps = NULL;
+ int k __attribute__((unused)) = 0;
+ char *snap_volume = NULL;
ret = dict_get_int32 (dict, "count", &count);
if (ret)
@@ -2193,23 +2532,30 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
+ snprintf (key, sizeof (key), "volume%d.status", i);
+ ret = dict_get_int32 (dict, key, &status);
if (ret)
goto out;
ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"type",
- "%d", type);
+ (xmlChar *)"status",
+ "%d", status);
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
+ snprintf (key, sizeof (key), "volume%d.snap_volume", i);
+ ret = dict_get_str (dict, key, &snap_volume);
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"status",
- "%d", status);
+ if (snap_volume) {
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"snapVol",
+ "%s", snap_volume);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret =xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_status_str[status]);
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
@@ -2253,6 +2599,25 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
XML_RET_CHECK_AND_GOTO (ret, out);
memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.type", i);
+ ret = dict_get_int32 (dict, key, &type);
+ if (ret)
+ goto out;
+ /* For Distributed-(stripe,replicate,stipe-replicate) types */
+ if ((type > 0) && (dist_count < brick_count))
+ type += 3;
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (local->writer,
+ (xmlChar *)"typeStr",
+ "%s",
+ cli_vol_type_str[type]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.transport", i);
ret = dict_get_int32 (dict, key, &transport);
if (ret)
@@ -2262,20 +2627,95 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
"%d", transport);
XML_RET_CHECK_AND_GOTO (ret, out);
+#ifdef HAVE_BD_XLATOR
+ /* <xlators> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlators");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ for (k = 0; ; k++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),"volume%d.xlator%d", i, k);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+
+ /* <xlator> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)"xlator");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"name", "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <capabilities> */
+ ret = xmlTextWriterStartElement (local->writer,
+ (xmlChar *)
+ "capabilities");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ j = 0;
+ for (j = 0; ;j++) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key),
+ "volume%d.xlator%d.caps%d", i, k, j);
+ ret = dict_get_str (dict, key, &caps);
+ if (ret)
+ break;
+ ret = xmlTextWriterWriteFormatElement
+ (local->writer, (xmlChar *)"capability",
+ "%s", caps);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ /* </capabilities> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlator> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ ret = xmlTextWriterFullEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </xlators> */
+#else
+ caps = 0; /* Avoid compiler warnings when BD not enabled */
+#endif
+ j = 1;
+
/* <bricks> */
ret = xmlTextWriterStartElement (local->writer,
(xmlChar *)"bricks");
XML_RET_CHECK_AND_GOTO (ret, out);
while (j <= brick_count) {
+ ret = xmlTextWriterStartElement
+ (local->writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "volume%d.brick%d.uuid",
+ i, j);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatAttribute
+ (local->writer, (xmlChar *)"uuid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
memset (key, 0, sizeof (key));
snprintf (key, sizeof (key), "volume%d.brick%d", i, j);
ret = dict_get_str (dict, key, &brick);
if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement
- (local->writer, (xmlChar *)"brick", "%s",
- brick);
+ ret = xmlTextWriterWriteFormatString
+ (local->writer, "%s", brick);
XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement (local->writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
j++;
}
/* </bricks> */
@@ -2293,24 +2733,30 @@ cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
ret = xmlTextWriterEndElement (local->writer);
XML_RET_CHECK_AND_GOTO (ret, out);
}
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = gf_strdup (volname);
- local->vol_count += count;
+ if (volname) {
+ GF_FREE (local->get_vol.volname);
+ local->get_vol.volname = gf_strdup (volname);
+ local->vol_count += count;
+ }
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
int
cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
GF_ASSERT (local);
- ret = cli_begin_xml_output (&(local->writer), &(local->buf));
+ ret = cli_begin_xml_output (&(local->writer), &(local->doc));
if (ret)
goto out;
@@ -2333,11 +2779,15 @@ cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
out:
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
int
cli_xml_output_vol_info_end (cli_local_t *local)
{
+#if (HAVE_LIB_XML)
int ret = -1;
GF_ASSERT (local);
@@ -2354,11 +2804,14 @@ cli_xml_output_vol_info_end (cli_local_t *local)
ret = xmlTextWriterEndElement (local->writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (local->writer, local->buf);
+ ret = cli_end_xml_output (local->writer, local->doc);
out:
gf_log ("cli", GF_LOG_ERROR, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
int
@@ -2366,9 +2819,10 @@ cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
int op_ret, int op_errno,
char *op_errstr)
{
+#if (HAVE_LIB_XML)
int ret = -1;
xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+ xmlDocPtr doc = NULL;
int64_t size = 0;
int64_t limit_value = 0;
int i = 0;
@@ -2386,7 +2840,7 @@ cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
GF_ASSERT (volname);
GF_ASSERT (limit_list);
- ret = cli_begin_xml_output (&writer, &buf);
+ ret = cli_begin_xml_output (&writer, &doc);
if (ret)
goto out;
@@ -2496,13 +2950,823 @@ cont:
ret = xmlTextWriterEndElement (writer);
XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ GF_FREE (size_str);
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *uuid = NULL;
+ char *hostname = NULL;
+ int connected = 0;
+ int state_id = 0;
+ char *state_str = NULL;
+ int i = 1;
+ char key[1024] = {0,};
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <peerStatus> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peerStatus");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (!dict)
+ goto cont;
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ /* <peer> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"peer");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.uuid", i);
+ ret = dict_get_str (dict, key, &uuid);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"uuid",
+ "%s", uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.hostname", i);
+ ret = dict_get_str (dict, key, &hostname);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.connected", i);
+ ret = dict_get_int32 (dict, key, &connected);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"connected",
+ "%d", connected);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.stateId", i);
+ ret = dict_get_int32 (dict, key, &state_id);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"state", "%d", state_id);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "friend%d.state", i);
+ ret = dict_get_str (dict, key, &state_str);
+ if (!ret) {
+ /* ignore */
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"stateStr", "%s", state_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* </peer> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ i++;
+ }
+
+cont:
+ /* </peerStatus> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+/* Used for rebalance stop/status, remove-brick status */
+int
+cli_xml_output_vol_rebalance_status (xmlTextWriterPtr writer, dict_t *dict,
+ enum gf_task_types task_type)
+{
+ int ret = -1;
+ int count = 0;
+ char *node_name = NULL;
+ char *node_uuid = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookups = 0;
+ int status_rcd = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t total_files = 0;
+ uint64_t total_size = 0;
+ uint64_t total_lookups = 0;
+ uint64_t total_failures = 0;
+ uint64_t total_skipped = 0;
+ char key[1024] = {0,};
+ int i = 0;
+ int overall_status = -1;
+ double elapsed = 0;
+ double overall_elapsed = 0;
+
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32 (dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
+ "%d", count);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ while (i < count) {
+ i++;
+
+ /* <node> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-name-%d", i);
+ ret = dict_get_str (dict, key, &node_name);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"nodeName",
+ "%s", node_name);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "node-uuid-%d", i);
+ ret = dict_get_str (dict, key, &node_uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"id",
+ "%s", node_uuid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "files-%d", i);
+ ret = dict_get_uint64 (dict, key, &files);
+ if (ret)
+ goto out;
+ total_files += files;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "size-%d", i);
+ ret = dict_get_uint64 (dict, key, &size);
+ if (ret)
+ goto out;
+ total_size += size;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"size",
+ "%"PRIu64,size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "lookups-%d", i);
+ ret = dict_get_uint64 (dict, key, &lookups);
+ if (ret)
+ goto out;
+ total_lookups += lookups;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"lookups",
+ "%"PRIu64, lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ ret = dict_get_uint64 (dict, key, &failures);
+ if (ret)
+ goto out;
+ total_failures += failures;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"failures",
+ "%"PRIu64, failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* skipped-%d is not available for remove brick in dict,
+ so using failures as skipped count in case of remove-brick
+ similar to logic used in CLI(non xml output) */
+ if (task_type == GF_TASK_TYPE_REBALANCE) {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "skipped-%d", i);
+ }
+ else {
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "failures-%d", i);
+ }
+
+ ret = dict_get_uint64 (dict, key, &skipped);
+ if (ret)
+ goto out;
+ total_skipped += skipped;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"skipped",
+ "%"PRIu64, skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "status-%d", i);
+ ret = dict_get_int32 (dict, key, &status_rcd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status_rcd);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[status_rcd]);
+
+ memset (key, 0, 256);
+ snprintf (key, 256, "run-time-%d", i);
+ ret = dict_get_double (dict, key, &elapsed);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"runtime",
+ "%.2f", elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (elapsed > overall_elapsed) {
+ overall_elapsed = elapsed;
+ }
+
+ if (-1 == overall_status)
+ overall_status = status_rcd;
+ else if ((GF_DEFRAG_STATUS_COMPLETE == overall_status ||
+ status_rcd > overall_status) &&
+ (status_rcd != GF_DEFRAG_STATUS_COMPLETE))
+ overall_status = status_rcd;
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </node> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ /* Aggregate status */
+ /* <aggregate> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"aggregate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"files",
+ "%"PRIu64, total_files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"size",
+ "%"PRIu64, total_size);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"lookups",
+ "%"PRIu64, total_lookups);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"failures",
+ "%"PRIu64, total_failures);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"skipped",
+ "%"PRIu64, total_skipped);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"status",
+ "%d", overall_status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"statusStr",
+ "%s",
+ cli_vol_task_status_str[overall_status]);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,(xmlChar *)"runtime",
+ "%.2f", overall_elapsed);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </aggregate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRebalance> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRebalance");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRebalance> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
out:
- if (size_str)
- GF_FREE (size_str);
gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
+#else
+ return 0;
+#endif
}
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRemoveBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volRemoveBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (status_op) {
+ ret = cli_xml_output_vol_rebalance_status (writer, dict,
+ GF_TASK_TYPE_REMOVE_BRICK);
+ if (ret)
+ goto out;
+ }
+
+ /* </volRemoveBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
#endif
+}
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ int status = 0;
+ uint64_t files = 0;
+ char *current_file = 0;
+ char *task_id_str = NULL;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volReplaceBrick> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volReplaceBrick");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"op",
+ "%d", op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (GF_REPLACE_OP_STATUS == op) {
+ ret = dict_get_int32 (dict, "status", &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%d", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_uint64 (dict, "files", &files);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"files",
+ "%"PRIu64, files);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ if (status)
+ goto cont;
+
+ ret = dict_get_str (dict, "current_file", &current_file);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"currentFile",
+ "%s", current_file);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+cont:
+ /* </volReplaceBrick> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <volCreate> */
+ ret = xmlTextWriterStartElement (writer,
+ (xmlChar *)"volCreate");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volume-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volCreate> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ GF_ASSERT (op);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <"op"> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *) "name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_str (dict, "vol-id", &volid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"id",
+ "%s", volid);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ ret = cli_end_xml_output (writer, doc);
+
+out:
+ gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_gsync_status (dict_t *dict, xmlTextWriterPtr writer)
+{
+ char master_key[PATH_MAX] = "";
+ char slave_key[PATH_MAX] = "";
+ char status_key[PATH_MAX] = "";
+ char node_key[PATH_MAX] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ char *status = NULL;
+ char *node = NULL;
+ int ret = -1;
+ int gsync_count = 0;
+ int i = 1;
+
+ ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ if (ret)
+ goto out;
+
+ for (i=1; i <= gsync_count; i++) {
+ snprintf (node_key, sizeof(node_key), "node%d", i);
+ snprintf (master_key, sizeof(master_key), "master%d", i);
+ snprintf (slave_key, sizeof(slave_key), "slave%d", i);
+ snprintf (status_key, sizeof(status_key), "status%d", i);
+
+ ret = dict_get_str (dict, node_key, &node);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, master_key, &master);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, slave_key, &slave);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str (dict, status_key, &status);
+ if (ret)
+ goto out;
+
+ /* <pair> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"pair");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"node",
+ "%s", node);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"status",
+ "%s", status);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ /* </pair> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ }
+
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int type = 0;
+
+ GF_ASSERT (dict);
+
+ ret = cli_begin_xml_output (&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <geoRep> */
+ ret = xmlTextWriterStartElement (writer, (xmlChar *)"geoRep");
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = dict_get_int32 (dict, "type", &type);
+ if (ret) {
+ gf_log ("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str (dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str (dict, "slave", &slave) != 0)
+ slave = "???";
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement (writer,
+ (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ break;
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ ret = cli_xml_output_vol_gsync_status(dict, writer);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ /* </geoRep> */
+ ret = xmlTextWriterEndElement (writer);
+ XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = cli_end_xml_output (writer, doc);
+out:
+ gf_log ("cli",GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 89b5a8bd4..91b315ff1 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -99,43 +89,6 @@ rpc_clnt_prog_t *cli_rpc_prog;
extern struct rpc_clnt_program cli_prog;
-
-
-
-static char *
-generate_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%"
-#ifdef GF_DARWIN_HOST_OS
- PRId32,
-#else
- "ld",
-#endif
- hostname, getpid(), now_str, tv.tv_usec);
-
- return gf_strdup (tmp_str);
-}
-
static int
glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
{
@@ -145,7 +98,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
xlator_mem_acct_init (THIS, cli_mt_end);
- ctx->process_uuid = generate_uuid ();
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
if (!ctx->process_uuid)
return -1;
@@ -210,12 +163,13 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
static int
-logging_init (struct cli_state *state)
+logging_init (glusterfs_ctx_t *ctx, struct cli_state *state)
{
char *log_file = state->log_file ? state->log_file :
DEFAULT_CLI_LOG_FILE_DIRECTORY "/cli.log";
- if (gf_log_init (log_file) == -1) {
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init (ctx, log_file, NULL) == -1) {
fprintf (stderr, "ERROR: failed to open logfile %s\n",
log_file);
return -1;
@@ -330,18 +284,41 @@ cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
return ret;
}
+
+/*
+ * ret: 0: option successfully processed
+ * 1: signalling end of option list
+ * -1: unknown option or other issue
+ */
int
cli_opt_parse (char *opt, struct cli_state *state)
{
char *oarg;
+ if (strcmp (opt, "") == 0)
+ return 1;
+
if (strcmp (opt, "version") == 0) {
- puts (argp_program_version);
+ cli_out ("%s", argp_program_version);
+ exit (0);
+ }
+
+ if (strcmp (opt, "print-logdir") == 0) {
+ cli_out ("%s", DEFAULT_LOG_FILE_DIRECTORY);
+ exit (0);
+ }
+
+ if (strcmp (opt, "print-statedumpdir") == 0) {
+ cli_out ("%s", DEFAULT_VAR_RUN_DIRECTORY);
exit (0);
}
if (strcmp (opt, "xml") == 0) {
+#if (HAVE_LIB_XML)
state->mode |= GLUSTER_MODE_XML;
+#else
+ cli_err ("XML output not supported. Ignoring '--xml' option");
+#endif
return 0;
}
@@ -376,6 +353,12 @@ cli_opt_parse (char *opt, struct cli_state *state)
return 0;
}
+ oarg = strtail (opt, "glusterd-sock=");
+ if (oarg) {
+ state->glusterd_sock = oarg;
+ return 0;
+ }
+
return -1;
}
@@ -403,9 +386,16 @@ parse_cmdline (int argc, char *argv[], struct cli_state *state)
state->argc--;
/* argv shifted, next check should be at i again */
i--;
+ if (ret == 1) {
+ /* end of cli options */
+ ret = 0;
+ break;
+ }
}
}
+ state->argv[state->argc] = NULL;
+
return ret;
}
@@ -430,7 +420,6 @@ cli_state_init (struct cli_state *state)
int ret = 0;
- state->remote_host = "localhost";
state->log_level = -1;
tree = &state->tree;
@@ -517,23 +506,46 @@ cli_rpc_init (struct cli_state *state)
if (!options)
goto out;
- ret = dict_set_str (options, "remote-host", state->remote_host);
- if (ret)
- goto out;
+ /* Connect using to glusterd using the specified method, giving
+ * preference to unix socket connection. If nothing is specified connect
+ * to the default glusterd socket
+ */
+ if (state->glusterd_sock) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to glusterd using "
+ "sockfile %s", state->glusterd_sock);
+ ret = rpc_transport_unix_options_build (&options,
+ state->glusterd_sock,
+ 0);
+ if (ret)
+ goto out;
+ } else if (state->remote_host) {
+ gf_log ("cli", GF_LOG_INFO, "Connecting to remote glusterd at "
+ "%s", state->remote_host);
+ ret = dict_set_str (options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
- if (state->remote_port)
- port = state->remote_port;
+ if (state->remote_port)
+ port = state->remote_port;
- ret = dict_set_int32 (options, "remote-port", port);
- if (ret)
- goto out;
+ ret = dict_set_int32 (options, "remote-port", port);
+ if (ret)
+ goto out;
- ret = dict_set_str (options, "transport.address-family", "inet/inet6");
- if (ret)
- goto out;
+ ret = dict_set_str (options, "transport.address-family",
+ "inet");
+ if (ret)
+ goto out;
+ } else {
+ gf_log ("cli", GF_LOG_DEBUG, "Connecting to glusterd using "
+ "default socket");
+ ret = rpc_transport_unix_options_build
+ (&options, DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+ }
rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
-
if (!rpc)
goto out;
@@ -543,7 +555,7 @@ cli_rpc_init (struct cli_state *state)
goto out;
}
- rpc_clnt_start (rpc);
+ ret = rpc_clnt_start (rpc);
out:
if (ret) {
if (rpc)
@@ -567,8 +579,7 @@ void
cli_local_wipe (cli_local_t *local)
{
if (local) {
- if (local->get_vol.volname)
- GF_FREE (local->get_vol.volname);
+ GF_FREE (local->get_vol.volname);
if (local->dict)
dict_unref (local->dict);
GF_FREE (local);
@@ -586,13 +597,19 @@ main (int argc, char *argv[])
int ret = -1;
glusterfs_ctx_t *ctx = NULL;
- ret = glusterfs_globals_init ();
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return ENOMEM;
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#endif
+
+ ret = glusterfs_globals_init (ctx);
if (ret)
return ret;
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- return ENOMEM;
+ THIS->ctx = ctx;
ret = glusterfs_ctx_defaults_init (ctx);
if (ret)
@@ -609,7 +626,7 @@ main (int argc, char *argv[])
if (ret)
goto out;
- ret = logging_init (&state);
+ ret = logging_init (ctx, &state);
if (ret)
goto out;
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 76b92e91d..8daa4b741 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_H__
#define __CLI_H__
@@ -28,6 +18,9 @@
#include "rpc-clnt.h"
#include "glusterfs.h"
#include "protocol-common.h"
+#include "logging.h"
+
+#include "cli1-xdr.h"
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
@@ -38,9 +31,8 @@
#define CLI_GLUSTERD_PORT 24007
#define CLI_DEFAULT_CONN_TIMEOUT 120
#define CLI_DEFAULT_CMD_TIMEOUT 120
-#define CLI_TOP_CMD_TIMEOUT 600 //Longer timeout for volume top
+#define CLI_TEN_MINUTES_TIMEOUT 600 //Longer timeout for volume top
#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
#define CLI_VOL_STATUS_BRICK_LEN 55
#define CLI_TAB_LENGTH 8
#define CLI_BRICK_STATUS_LINE_LEN 78
@@ -58,6 +50,10 @@ struct cli_cmd_word;
struct cli_cmd_tree;
struct cli_cmd;
+extern char *cli_vol_type_str[];
+extern char *cli_vol_status_str[];
+extern char *cli_vol_task_status_str[];
+
typedef int (cli_cmd_cbk_t)(struct cli_state *state,
struct cli_cmd_word *word,
const char **words,
@@ -119,6 +115,8 @@ struct cli_state {
char *log_file;
gf_loglevel_t log_level;
+
+ char *glusterd_sock;
};
struct cli_local {
@@ -128,15 +126,28 @@ struct cli_local {
} get_vol;
dict_t *dict;
+ const char **words;
/* Marker for volume status all */
gf_boolean_t all;
#if (HAVE_LIB_XML)
xmlTextWriterPtr writer;
- xmlBufferPtr buf;
+ xmlDocPtr doc;
int vol_count;
#endif
};
+struct gf_cli_gsync_detailed_status_ {
+ char *node;
+ char *master;
+ char *slave;
+ char *health;
+ char *uptime;
+ char *files_syncd;
+ char *files_pending;
+ char *bytes_pending;
+ char *deletes_pending;
+};
+
struct cli_volume_status {
int port;
int online;
@@ -155,6 +166,13 @@ struct cli_volume_status {
#endif
};
+struct snap_config_opt_vals_ {
+ char *op_name;
+ char *question;
+};
+
+typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t;
+
typedef struct cli_volume_status cli_volume_status_t;
typedef struct cli_local cli_local_t;
@@ -219,7 +237,7 @@ cli_cmd_quota_parse (const char **words, int wordcount, dict_t **opt);
int32_t
cli_cmd_volume_set_parse (const char **words, int wordcount,
- dict_t **options);
+ dict_t **options, char **op_errstr);
int32_t
cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
@@ -282,6 +300,10 @@ cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
dict_t **options);
int
+cli_cmd_volume_defrag_parse (const char **words, int wordcount,
+ dict_t **options);
+
+int
cli_print_brick_status (cli_volume_status_t *status);
void
@@ -293,7 +315,6 @@ cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status);
void
cli_print_line (int len);
-#if (HAVE_LIB_XML)
int
cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
char *op_errstr);
@@ -311,8 +332,14 @@ cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_vol_status_begin (cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_end (cli_local_t *local);
+
+int
+cli_xml_output_vol_status (cli_local_t *local, dict_t *dict);
int
cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
@@ -332,6 +359,42 @@ int
cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
int op_ret, int op_errno,
char *op_errstr);
-#endif
+
+int
+cli_xml_output_peer_status (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_rebalance (gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_remove_brick (gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_replace_brick (gf1_cli_replace_op op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_create (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_generic_volume (char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_gsync (dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_vol_status_tasks_detail (cli_local_t *local, dict_t *dict);
+
+char *
+is_server_debug_xlator (void *myframe);
+
+int32_t
+cli_cmd_snapshot_parse (const char **words, int wordcount, dict_t **options,
+ struct cli_state *state);
#endif /* __CLI_H__ */
diff --git a/cli/src/input.c b/cli/src/input.c
index a88d35874..a8ea46c6d 100644
--- a/cli/src/input.c
+++ b/cli/src/input.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -44,7 +34,7 @@ cli_batch (void *d)
ret = cli_cmd_process (state, state->argc, state->argv);
gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
+ exit (-ret);
return NULL;
}
@@ -71,11 +61,11 @@ cli_input (void *d)
if (len > 0 && cmd[len - 1] == '\n') //strip trailing \n
cmd[len - 1] = '\0';
ret = cli_cmd_process_line (state, cmd);
- if (ret == -1 && state->mode & GLUSTER_MODE_ERR_FATAL)
+ if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
break;
}
- exit (ret);
+ exit (-ret);
return NULL;
}
diff --git a/cli/src/registry.c b/cli/src/registry.c
index fc3b2decd..c4abe3be0 100644
--- a/cli/src/registry.c
+++ b/cli/src/registry.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
diff --git a/configure.ac b/configure.ac
index f8f270b9f..b3d1ed184 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,19 +1,10 @@
-dnl Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-
-dnl This file is part of GlusterFS.
-dnl
-dnl GlusterFS is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 3 of the License, or
-dnl (at your option) any later version.
-dnl
-dnl GlusterFS is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-dnl GNU General Public License for more details.
+dnl Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+dnl This file is part of GlusterFS.
dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl This file is licensed to you under your choice of the GNU Lesser
+dnl General Public License, version 3 or any later version (LGPLv3 or
+dnl later), or the GNU General Public License, version 2 (GPLv2), in all
+dnl cases as published by the Free Software Foundation.
AC_INIT([glusterfs],[3git],[gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
@@ -33,13 +24,15 @@ if libtool --help 2>&1 | grep -q quiet; then
AM_LIBTOOLFLAGS="--quiet";
fi
-AM_CONFIG_HEADER([config.h])
+AC_CONFIG_HEADERS([config.h])
AC_CONFIG_FILES([Makefile
- libglusterfs/Makefile
- libglusterfs/src/Makefile
- glusterfsd/Makefile
- glusterfsd/src/Makefile
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_add_secret_pub
+ glusterfsd/Makefile
+ glusterfsd/src/Makefile
rpc/Makefile
rpc/rpc-lib/Makefile
rpc/rpc-lib/src/Makefile
@@ -50,111 +43,162 @@ AC_CONFIG_FILES([Makefile
rpc/rpc-transport/rdma/src/Makefile
rpc/xdr/Makefile
rpc/xdr/src/Makefile
- xlators/Makefile
- xlators/mount/Makefile
- xlators/mount/fuse/Makefile
- xlators/mount/fuse/src/Makefile
- xlators/mount/fuse/utils/mount.glusterfs
- xlators/mount/fuse/utils/mount_glusterfs
- xlators/mount/fuse/utils/Makefile
- xlators/storage/Makefile
- xlators/storage/posix/Makefile
- xlators/storage/posix/src/Makefile
- xlators/cluster/Makefile
- xlators/cluster/afr/Makefile
- xlators/cluster/afr/src/Makefile
- xlators/cluster/stripe/Makefile
- xlators/cluster/stripe/src/Makefile
- xlators/cluster/dht/Makefile
- xlators/cluster/dht/src/Makefile
- xlators/performance/Makefile
- xlators/performance/write-behind/Makefile
- xlators/performance/write-behind/src/Makefile
- xlators/performance/read-ahead/Makefile
- xlators/performance/read-ahead/src/Makefile
- xlators/performance/io-threads/Makefile
- xlators/performance/io-threads/src/Makefile
- xlators/performance/io-cache/Makefile
- xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
- xlators/performance/quick-read/Makefile
- xlators/performance/quick-read/src/Makefile
+ xlators/Makefile
+ xlators/mount/Makefile
+ xlators/mount/fuse/Makefile
+ xlators/mount/fuse/src/Makefile
+ xlators/mount/fuse/utils/mount.glusterfs
+ xlators/mount/fuse/utils/mount_glusterfs
+ xlators/mount/fuse/utils/Makefile
+ xlators/storage/Makefile
+ xlators/storage/posix/Makefile
+ xlators/storage/posix/src/Makefile
+ xlators/storage/bd/Makefile
+ xlators/storage/bd/src/Makefile
+ xlators/cluster/Makefile
+ xlators/cluster/afr/Makefile
+ xlators/cluster/afr/src/Makefile
+ xlators/cluster/stripe/Makefile
+ xlators/cluster/stripe/src/Makefile
+ xlators/cluster/dht/Makefile
+ xlators/cluster/dht/src/Makefile
+ xlators/performance/Makefile
+ xlators/performance/write-behind/Makefile
+ xlators/performance/write-behind/src/Makefile
+ xlators/performance/read-ahead/Makefile
+ xlators/performance/read-ahead/src/Makefile
+ xlators/performance/readdir-ahead/Makefile
+ xlators/performance/readdir-ahead/src/Makefile
+ xlators/performance/io-threads/Makefile
+ xlators/performance/io-threads/src/Makefile
+ xlators/performance/io-cache/Makefile
+ xlators/performance/io-cache/src/Makefile
+ xlators/performance/symlink-cache/Makefile
+ xlators/performance/symlink-cache/src/Makefile
+ xlators/performance/quick-read/Makefile
+ xlators/performance/quick-read/src/Makefile
+ xlators/performance/open-behind/Makefile
+ xlators/performance/open-behind/src/Makefile
xlators/performance/md-cache/Makefile
xlators/performance/md-cache/src/Makefile
- xlators/debug/Makefile
- xlators/debug/trace/Makefile
- xlators/debug/trace/src/Makefile
- xlators/debug/error-gen/Makefile
- xlators/debug/error-gen/src/Makefile
- xlators/debug/io-stats/Makefile
- xlators/debug/io-stats/src/Makefile
- xlators/protocol/Makefile
- xlators/protocol/auth/Makefile
- xlators/protocol/auth/addr/Makefile
- xlators/protocol/auth/addr/src/Makefile
- xlators/protocol/auth/login/Makefile
- xlators/protocol/auth/login/src/Makefile
- xlators/protocol/client/Makefile
- xlators/protocol/client/src/Makefile
- xlators/protocol/server/Makefile
- xlators/protocol/server/src/Makefile
- xlators/features/Makefile
- xlators/features/locks/Makefile
- xlators/features/locks/src/Makefile
- xlators/features/quota/Makefile
- xlators/features/quota/src/Makefile
+ xlators/debug/Makefile
+ xlators/debug/trace/Makefile
+ xlators/debug/trace/src/Makefile
+ xlators/debug/error-gen/Makefile
+ xlators/debug/error-gen/src/Makefile
+ xlators/debug/io-stats/Makefile
+ xlators/debug/io-stats/src/Makefile
+ xlators/protocol/Makefile
+ xlators/protocol/auth/Makefile
+ xlators/protocol/auth/addr/Makefile
+ xlators/protocol/auth/addr/src/Makefile
+ xlators/protocol/auth/login/Makefile
+ xlators/protocol/auth/login/src/Makefile
+ xlators/protocol/client/Makefile
+ xlators/protocol/client/src/Makefile
+ xlators/protocol/server/Makefile
+ xlators/protocol/server/src/Makefile
+ xlators/features/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
+ xlators/features/glupy/Makefile
+ xlators/features/glupy/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+ xlators/features/quota/src/Makefile
xlators/features/marker/Makefile
xlators/features/marker/src/Makefile
- xlators/features/marker/utils/Makefile
- xlators/features/marker/utils/src/Makefile
- xlators/features/marker/utils/syncdaemon/Makefile
- xlators/features/read-only/Makefile
- xlators/features/read-only/src/Makefile
- xlators/features/mac-compat/Makefile
- xlators/features/mac-compat/src/Makefile
- xlators/features/quiesce/Makefile
- xlators/features/quiesce/src/Makefile
+ xlators/features/read-only/Makefile
+ xlators/features/read-only/src/Makefile
+ xlators/features/compress/Makefile
+ xlators/features/compress/src/Makefile
+ xlators/features/mac-compat/Makefile
+ xlators/features/mac-compat/src/Makefile
+ xlators/features/quiesce/Makefile
+ xlators/features/quiesce/src/Makefile
xlators/features/index/Makefile
xlators/features/index/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
+ xlators/features/protect/Makefile
+ xlators/features/protect/src/Makefile
+ xlators/features/gfid-access/Makefile
+ xlators/features/gfid-access/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
+ xlators/encryption/Makefile
+ xlators/encryption/rot-13/Makefile
+ xlators/encryption/rot-13/src/Makefile
+ xlators/encryption/crypt/Makefile
+ xlators/encryption/crypt/src/Makefile
+ xlators/features/qemu-block/Makefile
+ xlators/features/qemu-block/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
- cli/Makefile
- cli/src/Makefile
- doc/Makefile
- extras/Makefile
- extras/init.d/Makefile
- extras/init.d/glusterd.plist
- extras/init.d/glusterd-Debian
- extras/init.d/glusterd-Redhat
- extras/init.d/glusterd-SuSE
- extras/benchmarking/Makefile
- extras/hook-scripts/Makefile
- contrib/fuse-util/Makefile
- contrib/uuid/uuid_types.h
xlators/nfs/Makefile
xlators/nfs/server/Makefile
xlators/nfs/server/src/Makefile
xlators/mgmt/Makefile
xlators/mgmt/glusterd/Makefile
xlators/mgmt/glusterd/src/Makefile
- glusterfs.spec])
+ cli/Makefile
+ cli/src/Makefile
+ doc/Makefile
+ extras/Makefile
+ extras/init.d/Makefile
+ extras/init.d/glusterd.plist
+ extras/init.d/glusterd-Debian
+ extras/init.d/glusterd-Redhat
+ extras/init.d/glusterd-SuSE
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/benchmarking/Makefile
+ extras/hook-scripts/Makefile
+ extras/ocf/Makefile
+ extras/ocf/glusterd
+ extras/ocf/volume
+ extras/LinuxRPM/Makefile
+ extras/geo-rep/Makefile
+ contrib/fuse-util/Makefile
+ contrib/uuid/uuid_types.h
+ glusterfs-api.pc
+ libgfchangelog.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+ api/examples/setup.py
+ geo-replication/Makefile
+ geo-replication/src/Makefile
+ geo-replication/syncdaemon/Makefile
+ glusterfs.spec])
AC_CANONICAL_HOST
AC_PROG_CC
+AC_DISABLE_STATIC
AC_PROG_LIBTOOL
+AC_ARG_WITH(pkgconfigdir,
+ [ --with-pkgconfigdir=DIR pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@],
+ [pkgconfigdir=$withval],
+ [pkgconfigdir='${libdir}/pkgconfig'])
+AC_SUBST(pkgconfigdir)
+
AC_ARG_WITH(mountutildir,
[ --with-mountutildir=DIR mount helper utility in DIR @<:@/sbin@:>@],
[mountutildir=$withval],
[mountutildir='/sbin'])
AC_SUBST(mountutildir)
+AC_ARG_WITH(systemddir,
+ [ --with-systemddir=DIR systemd service files in DIR @<:@/usr/lib/systemd/system@:>@],
+ [systemddir=$withval],
+ [systemddir='/usr/lib/systemd/system'])
+AC_SUBST(systemddir)
+
AC_ARG_WITH(initdir,
[ --with-initdir=DIR init.d scripts in DIR @<:@/etc/init.d@:>@],
[initdir=$withval],
@@ -167,6 +211,13 @@ AC_ARG_WITH(launchddir,
[launchddir='/Library/LaunchDaemons'])
AC_SUBST(launchddir)
+AC_ARG_WITH([ocf],
+ [AS_HELP_STRING([--without-ocf], [build OCF-compliant cluster resource agents])],
+ ,
+ [OCF_SUBDIR='ocf'],
+ )
+AC_SUBST(OCF_SUBDIR)
+
# LEX needs a check
AC_PROG_LEX
if test "x${LEX}" != "xflex" -a "x${FLEX}" != "xlex"; then
@@ -210,6 +261,8 @@ AC_CHECK_HEADERS([sys/extattr.h])
AC_CHECK_HEADERS([openssl/md5.h])
+AC_CHECK_HEADERS([linux/falloc.h])
+
case $host_os in
darwin*)
if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then
@@ -243,8 +296,8 @@ fi
# FUSE section
AC_ARG_ENABLE([fuse-client],
- AC_HELP_STRING([--disable-fuse-client],
- [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
+ AC_HELP_STRING([--disable-fuse-client],
+ [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
BUILD_FUSE_CLIENT=no
if test "x$enable_fuse_client" != "xno"; then
@@ -252,60 +305,153 @@ if test "x$enable_fuse_client" != "xno"; then
BUILD_FUSE_CLIENT="yes"
fi
+AC_ARG_ENABLE([bd-xlator],
+ AC_HELP_STRING([--enable-bd-xlator], [Build BD xlator]))
+
+if test "x$enable_bd_xlator" != "xno"; then
+ AC_CHECK_LIB([lvm2app],
+ [lvm_init,lvm_lv_from_name],
+ [HAVE_BD_LIB="yes"],
+ [HAVE_BD_LIB="no"])
+
+if test "x$HAVE_BD_LIB" = "xyes"; then
+ # lvm_lv_from_name() has been made public with lvm2-2.02.79
+ AC_CHECK_DECLS(
+ [lvm_lv_from_name],
+ [NEED_LVM_LV_FROM_NAME_DECL="no"],
+ [NEED_LVM_LV_FROM_NAME_DECL="yes"],
+ [[#include <lvm2app.h>]])
+ fi
+fi
+
+if test "x$enable_bd_xlator" = "xyes" -a "x$HAVE_BD_LIB" = "xno"; then
+ echo "BD xlator requested but required lvm2 development library not found."
+ exit 1
+fi
+
+BUILD_BD_XLATOR=no
+if test "x${enable-bd-xlator}" != "xno" -a "x${HAVE_BD_LIB}" = "xyes"; then
+ BUILD_BD_XLATOR=yes
+ AC_DEFINE(HAVE_BD_XLATOR, 1, [define if lvm2app library found and bd xlator
+ enabled])
+ if test "x$NEED_LVM_LV_FROM_NAME_DECL" = "xyes"; then
+ AC_DEFINE(NEED_LVM_LV_FROM_NAME_DECL, 1, [defined if lvm_lv_from_name()
+ was not found in the lvm2app.h header, but can be linked])
+ fi
+fi
+
+AM_CONDITIONAL([ENABLE_BD_XLATOR], [test x$BUILD_BD_XLATOR = xyes])
+
+# start encryption/crypt section
+
+AC_CHECK_HEADERS([openssl/cmac.h], [have_cmac_h=yes], [have_cmac_h=no])
+
+AC_ARG_ENABLE([crypt-xlator],
+ AC_HELP_STRING([--enable-crypt-xlator], [Build crypt encryption xlator]))
+
+if test "x$enable_crypt_xlator" = "xyes" -a "x$have_cmac_h" = "xno"; then
+ echo "Encryption xlator requires OpenSSL with cmac.h"
+ exit 1
+fi
+
+BUILD_CRYPT_XLATOR=no
+if test "x$enable_crypt_xlator" != "xno" -a "x$have_cmac_h" = "xyes"; then
+ BUILD_CRYPT_XLATOR=yes
+ AC_DEFINE(HAVE_CRYPT_XLATOR, 1, [enable building crypt encryption xlator])
+fi
+
+AM_CONDITIONAL([ENABLE_CRYPT_XLATOR], [test x$BUILD_CRYPT_XLATOR = xyes])
+
AC_SUBST(FUSE_CLIENT_SUBDIR)
# end FUSE section
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
- AC_HELP_STRING([--enable-fusermount],
- [Build fusermount]))
+ AC_HELP_STRING([--disable-fusermount],
+ [Use system's fusermount]))
-BUILD_FUSERMOUNT="no"
-if test "x$enable_fusermount" = "xyes"; then
- FUSERMOUNT_SUBDIR="contrib/fuse-util"
- BUILD_FUSERMOUNT="yes"
+BUILD_FUSERMOUNT="yes"
+if test "x$enable_fusermount" = "xno"; then
+ BUILD_FUSERMOUNT="no"
+else
AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ FUSERMOUNT_SUBDIR="contrib/fuse-util"
fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
+# QEMU_BLOCK section
+
+AC_ARG_ENABLE([qemu-block],
+ AC_HELP_STRING([--enable-qemu-block],
+ [Build QEMU Block formats translator]))
+
+if test "x$enable_qemu_block" != "xno"; then
+ PKG_CHECK_MODULES([GLIB], [glib-2.0],
+ [HAVE_GLIB_2="yes"],
+ [HAVE_GLIB_2="no"])
+fi
+
+if test "x$enable_qemu_block" = "xyes" -a "x$HAVE_GLIB_2" = "xno"; then
+ echo "QEMU Block formats translator requires libglib-2.0, but missing."
+ exit 1
+fi
+
+BUILD_QEMU_BLOCK=no
+if test "x${enable_qemu_block}" != "xno" -a "x${HAVE_GLIB_2}" = "xyes"; then
+ BUILD_QEMU_BLOCK=yes
+ AC_DEFINE(HAVE_QEMU_BLOCK, 1, [define if libglib-2.0 library found and QEMU
+ Block translator enabled])
+fi
+
+AM_CONDITIONAL([ENABLE_QEMU_BLOCK], [test x$BUILD_QEMU_BLOCK = xyes])
+
+# end QEMU_BLOCK section
# EPOLL section
AC_ARG_ENABLE([epoll],
- AC_HELP_STRING([--disable-epoll],
- [Use poll instead of epoll.]))
+ AC_HELP_STRING([--disable-epoll],
+ [Use poll instead of epoll.]))
BUILD_EPOLL=no
if test "x$enable_epoll" != "xno"; then
AC_CHECK_HEADERS([sys/epoll.h],
[BUILD_EPOLL=yes],
- [BUILD_EPOLL=no])
+ [BUILD_EPOLL=no])
fi
# end EPOLL section
# IBVERBS section
AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
+ AC_HELP_STRING([--disable-ibverbs],
+ [Do not build the ibverbs transport]))
if test "x$enable_ibverbs" != "xno"; then
AC_CHECK_LIB([ibverbs],
[ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
+ [HAVE_LIBIBVERBS="yes"],
+ [HAVE_LIBIBVERBS="no"])
+ AC_CHECK_LIB([rdmacm], [rdma_create_id], [HAVE_RDMACM="yes"], [HAVE_RDMACM="no"])
fi
-if test "x$enable_ibverbs" = "xyes" -a "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs requested but not found."
- exit 1
+if test "x$enable_ibverbs" = "xyes"; then
+ if test "x$HAVE_LIBIBVERBS" = "xno"; then
+ echo "ibverbs-transport requested, but libibverbs is not present."
+ exit 1
+ fi
+
+ if test "x$HAVE_RDMACM" = "xno"; then
+ echo "ibverbs-transport requested, but librdmacm is not present."
+ exit 1
+ fi
fi
BUILD_RDMA=no
BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes"; then
+if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes" -a "x$HAVE_RDMACM" = "xyes"; then
IBVERBS_SUBDIR=ib-verbs
BUILD_IBVERBS=yes
RDMA_SUBDIR=rdma
@@ -319,8 +465,8 @@ AC_SUBST(RDMA_SUBDIR)
# SYNCDAEMON section
AC_ARG_ENABLE([georeplication],
- AC_HELP_STRING([--disable-georeplication],
- [Do not install georeplication components]))
+ AC_HELP_STRING([--disable-georeplication],
+ [Do not install georeplication components]))
BUILD_SYNCDAEMON=no
case $host_os in
@@ -332,12 +478,12 @@ case $host_os in
;;
*)
#disabling geo replication for non-linux platforms
- enable_georeplication=no
+ enable_georeplication=no
;;
esac
SYNCDAEMON_COMPILE=0
if test "x$enable_georeplication" != "xno"; then
- SYNCDAEMON_SUBDIR=utils
+ SYNCDAEMON_SUBDIR=geo-replication
SYNCDAEMON_COMPILE=1
BUILD_SYNCDAEMON="yes"
@@ -361,15 +507,67 @@ AC_SUBST(SYNCDAEMON_COMPILE)
AC_SUBST(SYNCDAEMON_SUBDIR)
# end SYNCDAEMON section
-#check if libxml is present if so enable HAVE_LIB_XML
-echo -n "checking if libxml2 is present... "
+# CDC xlator - check if libz is present if so enable HAVE_LIB_Z
+echo -n "checking if libz is present... "
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0 >= 2.6.19],
- [echo "yes (features requiring libxml2 enabled)" AC_DEFINE(HAVE_LIB_XML, 1, [define if libxml2 is present])],
+PKG_CHECK_MODULES([ZLIB], [zlib >= 1.2.0],
+ [echo "yes (features requiring zlib enabled)" AC_DEFINE(HAVE_LIB_Z, 1, [define if zlib is present])],
[echo "no"] )
-AC_SUBST(LIBXML2_CFLAGS)
-AC_SUBST(LIBXML2_LIBS)
+AC_SUBST(LIBZ_CFLAGS)
+AC_SUBST(LIBZ_LIBS)
+# end CDC xlator secion
+
+# check for systemtap/dtrace
+BUILD_SYSTEMTAP=no
+AC_MSG_CHECKING([whether to include systemtap tracing support])
+AC_ARG_ENABLE([systemtap],
+ [AS_HELP_STRING([--enable-systemtap],
+ [Enable inclusion of systemtap trace support])],
+ [ENABLE_SYSTEMTAP="${enableval}"], [ENABLE_SYSTEMTAP="def"])
+
+AM_CONDITIONAL([ENABLE_SYSTEMTAP], [test "x${ENABLE_SYSTEMTAP}" = "xyes"])
+AC_MSG_RESULT(${ENABLE_SYSTEMTAP})
+
+if test "x${ENABLE_SYSTEMTAP}" != "xno"; then
+ AC_CHECK_PROG(DTRACE, dtrace, "yes", "no")
+ AC_CHECK_HEADER([sys/sdt.h], [SDT_H_FOUND="yes"],
+ [SDT_H_FOUND="no"])
+fi
+
+if test "x${ENABLE_SYSTEMTAP}" = "xyes"; then
+ if test "x${DTRACE}" = "xno"; then
+ AC_MSG_ERROR([dtrace not found])
+ elif test "$x{SDT_H_FOUND}" = "xno"; then
+ AC_MSG_ERROR([systemtap support needs sys/sdt.h header])
+ fi
+fi
+
+if test "x${DTRACE}" = "xyes" -a "x${SDT_H_FOUND}" = "xyes"; then
+ AC_MSG_CHECKING([x"${DTRACE}"xy"${SDT_H_FOUND}"y])
+ AC_DEFINE([HAVE_SYSTEMTAP], [1], [Define to 1 if using probes.])
+ BUILD_SYSTEMTAP=yes
+fi
+# end of systemtap/dtrace
+
+# xml-output
+AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+ [Disable the xml output]))
+BUILD_XML_OUTPUT="yes"
+if test "x$enable_xml_output" != "xno"; then
+ #check if libxml is present if so enable HAVE_LIB_XML
+ m4_ifdef([AM_PATH_XML2],[AM_PATH_XML2([2.6.19])], [no_xml=yes])
+ if test "x${no_xml}" = "x"; then
+ AC_DEFINE([HAVE_LIB_XML], [1], [Define to 1 if using libxml2.])
+ else
+ AC_MSG_WARN([libxml2 devel libraries not found disabling XML support])
+ BUILD_XML_OUTPUT="no"
+ fi
+else
+ BUILD_XML_OUTPUT="no"
+fi
+# end of xml-output
dnl FreeBSD > 5 has execinfo as a Ported library for giving a workaround
dnl solution to GCC backtrace functionality
@@ -395,9 +593,9 @@ AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
case $host_os in
- *netbsd*)
- CFLAGS=-D_INCOMPLETE_XOPEN_C063
- ;;
+ *netbsd*)
+ CFLAGS+=" -D_INCOMPLETE_XOPEN_C063"
+ ;;
esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
if test "x${have_linkat}" = "xyes"; then
@@ -405,11 +603,15 @@ if test "x${have_linkat}" = "xyes"; then
fi
AC_SUBST(HAVE_LINKAT)
+dnl check for Monotonic clock
+AC_CHECK_FUNC([clock_gettime], [has_monotonic_clock=yes], AC_CHECK_LIB([rt], [clock_gettime], , AC_MSG_WARN([System doesn't have monotonic clock using contrib])))
+
dnl Check for argp
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
AC_CONFIG_SUBDIRS(argp-standalone)
+
BUILD_ARGP_STANDALONE=no
-if test "x${ac_cv_header_argp_h}" = "xno"; then
+if test "x${ac_cv_header_argp_h}" = "xno"; then
BUILD_ARGP_STANDALONE=yes
ARGP_STANDALONE_CPPFLAGS='-I${top_srcdir}/argp-standalone'
ARGP_STANDALONE_LDADD='${top_builddir}/argp-standalone/libargp.a'
@@ -430,7 +632,18 @@ if test "x${have_fdatasync}" = "xyes"; then
AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists])
fi
-# Check the distribution where you are compiling glusterfs on
+AC_CHECK_FUNC([fallocate], [have_fallocate=yes])
+if test "x${have_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists])
+fi
+
+AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes])
+if test "x${have_posix_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
+fi
+
+
+# Check the distribution where you are compiling glusterfs on
GF_DISTRIBUTION=
AC_CHECK_FILE([/etc/debian_version])
@@ -452,63 +665,115 @@ AC_SUBST(GF_DISTRIBUTION)
GF_HOST_OS=""
GF_LDFLAGS="-rdynamic"
+# check for gcc -Werror=format-security
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Wformat -Werror=format-security"
+AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
+echo $cc_werror_format_security
+if test "x$cc_werror_format_security" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
+fi
+
+# check for gcc -Werror=implicit-function-declaration
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Werror=implicit-function-declaration"
+AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
+echo $cc_werror_implicit
+if test "x$cc_werror_implicit" = "xno"; then
+ CFLAGS="$saved_CFLAGS"
+else
+ CFLAGS="$saved_CFLAGS $CFLAGS"
+fi
+
case $host_os in
linux*)
- dnl GF_LINUX_HOST_OS=1
GF_HOST_OS="GF_LINUX_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
+ ;;
solaris*)
GF_HOST_OS="GF_SOLARIS_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
- GF_LDFLAGS=""
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_GLUSTERFS_LDFLAGS="-lnsl -lresolv -lsocket"
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
+ GF_LDFLAGS=""
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_GLUSTERFS_LIBS="-lnsl -lresolv -lsocket"
BUILD_FUSE_CLIENT=no
FUSE_CLIENT_SUBDIR=""
- ;;
+ ;;
*netbsd*)
- GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- GF_FUSE_LDADD="-lperfuse"
- BUILD_FUSE_CLIENT=yes
- LEXLIB=""
- ;;
+ GF_HOST_OS="GF_BSD_HOST_OS"
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ GF_FUSE_LDADD="-lperfuse"
+ BUILD_FUSE_CLIENT=yes
+ LEXLIB=""
+ ;;
*bsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- BUILD_FUSE_CLIENT=no
- ;;
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_GLUSTERFS_LIBS="-lexecinfo"
+ fi
+ BUILD_FUSE_CLIENT=no
+ ;;
darwin*)
GF_HOST_OS="GF_DARWIN_HOST_OS"
- LIBTOOL=glibtool
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
- ;;
+ LIBTOOL=glibtool
+ GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
+ GF_LDADD="${ARGP_STANDALONE_LDADD}"
+ GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
+ ;;
esac
+# enable debug section
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+
+BUILD_DEBUG=no
+if test "x$enable_debug" = "xyes"; then
+ BUILD_DEBUG=yes
+ CFLAGS=`echo $CFLAGS | sed -e s/O2/O0/`
+else
+ BUILD_DEBUG=no
+fi
+AC_SUBST(CFLAGS)
+# end enable debug section
+
+# syslog section
+AC_ARG_ENABLE([syslog],
+ AC_HELP_STRING([--disable-syslog],
+ [Disable syslog for logging]))
+
+USE_SYSLOG="yes"
+if test "x$enable_syslog" != "xno"; then
+ AC_DEFINE(GF_USE_SYSLOG, 1, [Use syslog for logging])
+else
+ USE_SYSLOG="no"
+fi
+AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes])
+#end syslog section
+
BUILD_READLINE=no
AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"])
AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"])
@@ -519,8 +784,66 @@ if test "x$RLLIBS" != "x"; then
BUILD_READLINE=yes
fi
+BUILD_LIBAIO=no
+AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
+
+if test "x$LIBAIO" != "x"; then
+ AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
+ BUILD_LIBAIO=yes
+fi
+
+# glupy section
+BUILD_GLUPY=no
+have_python2=no
+have_Python_h=no
+
+AM_PATH_PYTHON()
+if echo $PYTHON_VERSION | grep ^2; then
+ have_python2=yes
+fi
+AC_CHECK_HEADERS([python$PYTHON_VERSION/Python.h],[have_Python_h=yes],[])
+AC_ARG_ENABLE([glupy],
+ AS_HELP_STRING([--enable-glupy],
+ [build glupy]))
+case x$enable_glupy in
+ xyes)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_ERROR([glupy requires python-devel/python-dev package and python2.x])
+ fi
+ ;;
+ xno)
+ ;;
+ *)
+ if test "x$have_python2" = "xyes" -a "x$have_Python_h" = "xyes"; then
+ BUILD_GLUPY=yes
+ else
+ AC_MSG_WARN([
+ ---------------------------------------------------------------------------------
+ cannot build glupy. python 2.x and python-devel/python-dev package are required.
+ ---------------------------------------------------------------------------------])
+ fi
+ ;;
+esac
+
+if test "x$BUILD_GLUPY" = "xyes"; then
+ BUILD_PYTHON_INC=`$PYTHON -c "from distutils import sysconfig; print sysconfig.get_python_inc()"`
+ BUILD_PYTHON_LIB=python$PYTHON_VERSION
+ GLUPY_SUBDIR=glupy
+ GLUPY_SUBDIR_MAKEFILE=xlators/features/glupy/Makefile
+ GLUPY_SUBDIR_SRC_MAKEFILE=xlators/features/glupy/src/Makefile
+ echo "building glupy with -isystem $BUILD_PYTHON_INC -l $BUILD_PYTHON_LIB"
+ AC_SUBST(BUILD_PYTHON_INC)
+ AC_SUBST(BUILD_PYTHON_LIB)
+ AC_SUBST(GLUPY_SUBDIR)
+ AC_SUBST(GLUPY_SUBDIR_MAKEFILE)
+ AC_SUBST(GLUPY_SUBDIR_SRC_MAKEFILE)
+fi
+# end glupy section
+
AC_SUBST(GF_HOST_OS)
-AC_SUBST(GF_GLUSTERFS_LDFLAGS)
+AC_SUBST([GF_GLUSTERFS_LIBS])
AC_SUBST(GF_GLUSTERFS_CFLAGS)
AC_SUBST(GF_CFLAGS)
AC_SUBST(GF_LDFLAGS)
@@ -528,27 +851,41 @@ AC_SUBST(GF_LDADD)
AC_SUBST(GF_FUSE_LDADD)
AC_SUBST(GF_FUSE_CFLAGS)
AC_SUBST(RLLIBS)
+AC_SUBST(LIBAIO)
AC_SUBST(AM_MAKEFLAGS)
AC_SUBST(AM_LIBTOOLFLAGS)
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
-INCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
-AC_SUBST(INCLUDES)
+GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
+GF_CPPINCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
+GF_CPPFLAGS="$GF_CPPDEFINES $GF_CPPINCLUDES"
+AC_SUBST([GF_CPPFLAGS])
AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d ${localstatedir}/lib/glusterd && test -d ${sysconfdir}/glusterd )
+
AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
-echo "readline : $BUILD_READLINE"
-echo "georeplication : $BUILD_SYNCDAEMON"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "Infiniband verbs : $BUILD_IBVERBS"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "argp-standalone : $BUILD_ARGP_STANDALONE"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $BUILD_DEBUG"
+echo "systemtap : $BUILD_SYSTEMTAP"
+echo "Block Device xlator : $BUILD_BD_XLATOR"
+echo "glupy : $BUILD_GLUPY"
+echo "Use syslog : $USE_SYSLOG"
+echo "XML output : $BUILD_XML_OUTPUT"
+echo "QEMU Block formats : $BUILD_QEMU_BLOCK"
+echo "Encryption xlator : $BUILD_CRYPT_XLATOR"
echo
diff --git a/contrib/aclocal/mkdirp.m4 b/contrib/aclocal/mkdirp.m4
new file mode 100644
index 000000000..d2f7edd5c
--- /dev/null
+++ b/contrib/aclocal/mkdirp.m4
@@ -0,0 +1,146 @@
+# Excerpt from autoconf/autoconf/programs.m4
+# This file is part of Autoconf. -*- Autoconf -*-
+# Checking for programs.
+
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+
+# This file is part of Autoconf. This program is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the Autoconf Configure Script Exception,
+# version 3.0, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License
+# and a copy of the Autoconf Configure Script Exception along with
+# this program; see the files COPYINGv3 and COPYING.EXCEPTION
+# respectively. If not, see <http://www.gnu.org/licenses/>.
+
+# Written by David MacKenzie, with help from
+# Franc,ois Pinard, Karl Berry, Richard Pixley, Ian Lance Taylor,
+# Roland McGrath, Noah Friedman, david d zuhn, and many others.
+
+# AC_PROG_MKDIR_P
+# ---------------
+# Check whether `mkdir -p' is known to be thread-safe, and fall back to
+# install-sh -d otherwise.
+#
+# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
+# created by `make install' are always world readable, even if the
+# installer happens to have an overly restrictive umask (e.g. 077).
+# This was a mistake. There are at least two reasons why we must not
+# use `-m 0755':
+# - it causes special bits like SGID to be ignored,
+# - it may be too restrictive (some setups expect 775 directories).
+#
+# Do not use -m 0755 and let people choose whatever they expect by
+# setting umask.
+#
+# We cannot accept any implementation of `mkdir' that recognizes `-p'.
+# Some implementations (such as Solaris 8's) are vulnerable to race conditions:
+# if a parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
+# concurrently, both version can detect that a/ is missing, but only
+# one can create it and the other will error out. Consequently we
+# restrict ourselves to known race-free implementations.
+#
+# Automake used to define mkdir_p as `mkdir -p .', in order to
+# allow $(mkdir_p) to be used without argument. As in
+# $(mkdir_p) $(somedir)
+# where $(somedir) is conditionally defined. However we don't do
+# that for MKDIR_P.
+# 1. before we restricted the check to GNU mkdir, `mkdir -p .' was
+# reported to fail in read-only directories. The system where this
+# happened has been forgotten.
+# 2. in practice we call $(MKDIR_P) on directories such as
+# $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# and we don't want to create $(DESTDIR) if $(somedir) is empty.
+# To support the latter case, we have to write
+# test -z "$(somedir)" || $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# so $(MKDIR_P) always has an argument.
+# We will have better chances of detecting a missing test if
+# $(MKDIR_P) complains about missing arguments.
+# 3. $(MKDIR_P) is named after `mkdir -p' and we don't expect this
+# to accept no argument.
+# 4. having something like `mkdir .' in the output is unsightly.
+#
+# On NextStep and OpenStep, the `mkdir' command does not
+# recognize any option. It will interpret all options as
+# directories to create.
+AN_MAKEVAR([MKDIR_P], [AC_PROG_MKDIR_P])
+AC_DEFUN_ONCE([AC_PROG_MKDIR_P],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+
+AC_MSG_CHECKING([for a thread-safe mkdir -p])
+if test -z "$MKDIR_P"; then
+ AC_CACHE_VAL([ac_cv_path_mkdir],
+ [_AS_PATH_WALK([$PATH$PATH_SEPARATOR/opt/sfw/bin],
+ [for ac_prog in mkdir gmkdir; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ AS_EXECUTABLE_P(["$as_dir/$ac_prog$ac_exec_ext"]) || continue
+ case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+ 'mkdir (GNU coreutils) '* | \
+ 'mkdir (coreutils) '* | \
+ 'mkdir (fileutils) '4.1*)
+ ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+ break 3;;
+ esac
+ done
+ done])])
+ test -d ./--version && rmdir ./--version
+ if test "${ac_cv_path_mkdir+set}" = set; then
+ MKDIR_P="$ac_cv_path_mkdir -p"
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for MKDIR_P within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ MKDIR_P="$ac_install_sh -d"
+ fi
+fi
+dnl status.m4 does special magic for MKDIR_P instead of AC_SUBST,
+dnl to get relative names right. However, also AC_SUBST here so
+dnl that Automake versions before 1.10 will pick it up (they do not
+dnl trace AC_SUBST_TRACE).
+dnl FIXME: Remove this once we drop support for Automake < 1.10.
+AC_SUBST([MKDIR_P])dnl
+AC_MSG_RESULT([$MKDIR_P])
+])# AC_PROG_MKDIR_P
+
+
+# From automake/m4/mkdirp.m4
+## -*- Autoconf -*-
+# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+ [[\\/$]]* | ?:[[\\/]]*) ;;
+ */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
diff --git a/contrib/aclocal/python.m4 b/contrib/aclocal/python.m4
new file mode 100644
index 000000000..a39a90090
--- /dev/null
+++ b/contrib/aclocal/python.m4
@@ -0,0 +1,209 @@
+## ------------------------ -*- Autoconf -*-
+## Python file handling
+## From Andrew Dalke
+## Updated by James Henstridge
+## ------------------------
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# ---------------------------------------------------------------------------
+# Adds support for distributing Python modules and packages. To
+# install modules, copy them to $(pythondir), using the python_PYTHON
+# automake variable. To install a package with the same name as the
+# automake package, install to $(pkgpythondir), or use the
+# pkgpython_PYTHON automake variable.
+#
+# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as
+# locations to install python extension modules (shared libraries).
+# Another macro is required to find the appropriate flags to compile
+# extension modules.
+#
+# If your package is configured with a different prefix to python,
+# users will have to add the install directory to the PYTHONPATH
+# environment variable, or create a .pth file (see the python
+# documentation for details).
+#
+# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will
+# cause an error if the version of python installed on the system
+# doesn't meet the requirement. MINIMUM-VERSION should consist of
+# numbers and dots only.
+AC_DEFUN([AM_PATH_PYTHON],
+ [
+ dnl Find a Python interpreter. Python versions prior to 2.0 are not
+ dnl supported. (2.0 was released on October 16, 2000).
+ m4_define_default([_AM_PYTHON_INTERPRETER_LIST],
+ [python python2 python3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 dnl
+python2.1 python2.0])
+
+ m4_if([$1],[],[
+ dnl No version check is needed.
+ # Find any Python interpreter.
+ if test -z "$PYTHON"; then
+ AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :)
+ fi
+ am_display_PYTHON=python
+ ], [
+ dnl A version check is needed.
+ if test -n "$PYTHON"; then
+ # If the user set $PYTHON, use it and don't search something else.
+ AC_MSG_CHECKING([whether $PYTHON version >= $1])
+ AM_PYTHON_CHECK_VERSION([$PYTHON], [$1],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_ERROR(too old)])
+ am_display_PYTHON=$PYTHON
+ else
+ # Otherwise, try each interpreter until we find one that satisfies
+ # VERSION.
+ AC_CACHE_CHECK([for a Python interpreter with version >= $1],
+ [am_cv_pathless_PYTHON],[
+ for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do
+ test "$am_cv_pathless_PYTHON" = none && break
+ AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break])
+ done])
+ # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
+ if test "$am_cv_pathless_PYTHON" = none; then
+ PYTHON=:
+ else
+ AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON])
+ fi
+ am_display_PYTHON=$am_cv_pathless_PYTHON
+ fi
+ ])
+
+ if test "$PYTHON" = :; then
+ dnl Run any user-specified action, or abort.
+ m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])])
+ else
+
+ dnl Query Python for its version number. Getting [:3] seems to be
+ dnl the best way to do this; it's what "site.py" does in the standard
+ dnl library.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version],
+ [am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[[:3]])"`])
+ AC_SUBST([PYTHON_VERSION], [$am_cv_python_version])
+
+ dnl Use the values of $prefix and $exec_prefix for the corresponding
+ dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made
+ dnl distinct variables so they can be overridden if need be. However,
+ dnl general consensus is that you shouldn't need this ability.
+
+ AC_SUBST([PYTHON_PREFIX], ['${prefix}'])
+ AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}'])
+
+ dnl At times (like when building shared libraries) you may want
+ dnl to know which OS platform Python thinks this is.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform],
+ [am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`])
+ AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform])
+
+
+ dnl Set up 4 directories:
+
+ dnl pythondir -- where to install python scripts. This is the
+ dnl site-packages directory, not the python standard library
+ dnl directory like in previous automake betas. This behavior
+ dnl is more consistent with lispdir.m4 for example.
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON script directory],
+ [am_cv_python_pythondir],
+ [if test "x$prefix" = xNONE
+ then
+ am_py_prefix=$ac_default_prefix
+ else
+ am_py_prefix=$prefix
+ fi
+ am_cv_python_pythondir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(0,0,prefix='$am_py_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pythondir in
+ $am_py_prefix*)
+ am__strip_prefix=`echo "$am_py_prefix" | sed 's|.|.|g'`
+ am_cv_python_pythondir=`echo "$am_cv_python_pythondir" | sed "s,^$am__strip_prefix,$PYTHON_PREFIX,"`
+ ;;
+ *)
+ case $am_py_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pythondir=$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pythondir], [$am_cv_python_pythondir])
+
+ dnl pkgpythondir -- $PACKAGE directory under pythondir. Was
+ dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is
+ dnl more consistent with the rest of automake.
+
+ AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE])
+
+ dnl pyexecdir -- directory for installing python extension modules
+ dnl (shared libraries)
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON extension module directory],
+ [am_cv_python_pyexecdir],
+ [if test "x$exec_prefix" = xNONE
+ then
+ am_py_exec_prefix=$am_py_prefix
+ else
+ am_py_exec_prefix=$exec_prefix
+ fi
+ am_cv_python_pyexecdir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(1,0,prefix='$am_py_exec_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pyexecdir in
+ $am_py_exec_prefix*)
+ am__strip_prefix=`echo "$am_py_exec_prefix" | sed 's|.|.|g'`
+ am_cv_python_pyexecdir=`echo "$am_cv_python_pyexecdir" | sed "s,^$am__strip_prefix,$PYTHON_EXEC_PREFIX,"`
+ ;;
+ *)
+ case $am_py_exec_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pyexecdir=$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir])
+
+ dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE)
+
+ AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE])
+
+ dnl Run any user-specified action.
+ $2
+ fi
+
+])
+
+
+# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# ---------------------------------------------------------------------------
+# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION.
+# Run ACTION-IF-FALSE otherwise.
+# This test uses sys.hexversion instead of the string equivalent (first
+# word of sys.version), in order to cope with versions such as 2.2c1.
+# This supports Python 2.0 or higher. (2.0 was released on October 16, 2000).
+AC_DEFUN([AM_PYTHON_CHECK_VERSION],
+ [prog="import sys
+# split strings by '.' and convert to numeric. Append some zeros
+# because we need at least 4 digits for the hex conversion.
+# map returns an iterator in Python 3.0 and a list in 2.x
+minver = list(map(int, '$2'.split('.'))) + [[0, 0, 0]]
+minverhex = 0
+# xrange is not present in Python 3.0 and range returns an iterator
+for i in list(range(0, 4)): minverhex = (minverhex << 8) + minver[[i]]
+sys.exit(sys.hexversion < minverhex)"
+ AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])])
diff --git a/contrib/fuse-include/fuse-mount.h b/contrib/fuse-include/fuse-mount.h
index 7a3756d92..9358ac810 100644
--- a/contrib/fuse-include/fuse-mount.h
+++ b/contrib/fuse-include/fuse-mount.h
@@ -8,5 +8,6 @@
*/
void gf_fuse_unmount (const char *mountpoint, int fd);
-int gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
+int gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
pid_t *mtab_pid, int status_fd);
diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h
index 9ae25d6f9..60bb2f9f7 100644
--- a/contrib/fuse-include/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel.h
@@ -60,23 +60,75 @@
* 7.13
* - make max number of background requests and congestion threshold
* tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
-#define __u16 uint16_t
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 13
+#define FUSE_KERNEL_MINOR_VERSION 22
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -85,42 +137,42 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
- __u32 blksize;
- __u32 padding;
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
};
/**
@@ -151,8 +203,22 @@ struct fuse_file_lock {
/**
* INIT request/reply flags
*
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -161,6 +227,15 @@ struct fuse_file_lock {
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
/**
* CUSE INIT request/reply flags
@@ -173,6 +248,7 @@ struct fuse_file_lock {
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
@@ -204,12 +280,16 @@ struct fuse_file_lock {
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
@@ -259,6 +339,10 @@ enum fuse_opcode {
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -268,6 +352,9 @@ enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
@@ -277,133 +364,143 @@ enum fuse_notify_code {
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
};
struct fuse_getattr_in {
- __u32 getattr_flags;
- __u32 dummy;
- __u64 fh;
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
- __u32 umask;
- __u32 padding;
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 umask;
+ uint32_t mode;
+ uint32_t umask;
};
struct fuse_rename_in {
- __u64 newdir;
+ uint64_t newdir;
};
struct fuse_link_in {
- __u64 oldnodeid;
+ uint64_t oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 lock_owner;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t unused2;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t unused3;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
};
struct fuse_open_in {
- __u32 flags;
- __u32 unused;
+ uint32_t flags;
+ uint32_t unused;
};
struct fuse_create_in {
- __u32 flags;
- __u32 mode;
- __u32 umask;
- __u32 padding;
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 read_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
@@ -413,32 +510,32 @@ struct fuse_statfs_out {
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
+ uint32_t size;
+ uint32_t flags;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
+ uint64_t fh;
+ uint64_t owner;
struct fuse_file_lock lk;
- __u32 lk_flags;
- __u32 padding;
+ uint32_t lk_flags;
+ uint32_t padding;
};
struct fuse_lk_out {
@@ -446,134 +543,190 @@ struct fuse_lk_out {
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ uint32_t mask;
+ uint32_t padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
};
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u16 max_background;
- __u16 congestion_threshold;
- __u32 max_write;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
};
struct cuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
- __u32 max_read;
- __u32 max_write;
- __u32 dev_major; /* chardev major */
- __u32 dev_minor; /* chardev minor */
- __u32 spare[10];
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
};
struct fuse_interrupt_in {
- __u64 unique;
+ uint64_t unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
};
struct fuse_bmap_out {
- __u64 block;
+ uint64_t block;
};
struct fuse_ioctl_in {
- __u64 fh;
- __u32 flags;
- __u32 cmd;
- __u64 arg;
- __u32 in_size;
- __u32 out_size;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
};
struct fuse_ioctl_out {
- __s32 result;
- __u32 flags;
- __u32 in_iovs;
- __u32 out_iovs;
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
};
struct fuse_poll_in {
- __u64 fh;
- __u64 kh;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
};
struct fuse_poll_out {
- __u32 revents;
- __u32 padding;
+ uint32_t revents;
+ uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
- __u64 kh;
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
struct fuse_notify_inval_inode_out {
- __u64 ino;
- __s64 off;
- __s64 len;
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
};
struct fuse_notify_inval_entry_out {
- __u64 parent;
- __u32 namelen;
- __u32 padding;
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
};
#endif /* _LINUX_FUSE_H */
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
index 17c11e789..4fc20623b 100644
--- a/contrib/fuse-lib/mount-gluster-compat.h
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -33,6 +33,7 @@
#ifdef __NetBSD__
#include <perfuse.h>
#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#define MS_RDONLY MNT_RDONLY
#endif
#ifdef linux
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index f02a835b3..922d9e464 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -100,7 +100,8 @@ escape (char *s)
}
static int
-fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
+fuse_mount_fusermount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
int fd)
{
int pid = -1;
@@ -124,7 +125,8 @@ fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
return -1;
}
ret = asprintf (&fm_mnt_params,
- "%s,fsname=%s,nonempty,subtype=glusterfs",
+ "%s%s,fsname=%s,nonempty,subtype=glusterfs",
+ (mountflags & MS_RDONLY) ? "ro," : "",
mnt_param, efsname);
FREE (efsname);
if (ret == -1) {
@@ -169,7 +171,8 @@ fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
}
static int
-fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
+fuse_mount_sys (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param, int fd)
{
int ret = -1;
unsigned mounted = 0;
@@ -185,7 +188,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
if (ret == -1 && errno == ENODEV) {
/* fs subtype support was added by 79c0b2df aka
@@ -198,7 +201,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
}
if (ret == -1)
@@ -209,6 +212,7 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
#ifndef __NetBSD__
if (geteuid () == 0) {
char *newmnt = fuse_mnt_resolve_path ("fuse", mountpoint);
+ char *mnt_param_mtab = NULL;
if (!newmnt) {
ret = -1;
@@ -216,8 +220,17 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = fuse_mnt_add_mount ("fuse", source, newmnt, fstype,
- mnt_param);
+ ret = asprintf (&mnt_param_mtab, "%s%s",
+ mountflags & MS_RDONLY ? "ro," : "",
+ mnt_param);
+ if (ret == -1)
+ GFFUSE_LOGERR ("Out of memory");
+ else {
+ ret = fuse_mnt_add_mount ("fuse", source, newmnt,
+ fstype, mnt_param_mtab);
+ FREE (mnt_param_mtab);
+ }
+
FREE (newmnt);
if (ret == -1) {
GFFUSE_LOGERR ("failed to add mtab entry");
@@ -240,7 +253,8 @@ out:
}
int
-gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
+gf_fuse_mount (const char *mountpoint, char *fsname,
+ unsigned long mountflags, char *mnt_param,
pid_t *mnt_pid, int status_fd)
{
int fd = -1;
@@ -268,19 +282,20 @@ gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
exit (pid == -1 ? 1 : 0);
}
- ret = fuse_mount_sys (mountpoint, fsname, mnt_param, fd);
+ ret = fuse_mount_sys (mountpoint, fsname, mountflags, mnt_param, fd);
if (ret == -1) {
gf_log ("glusterfs-fuse", GF_LOG_INFO,
"direct mount failed (%s), "
"retry to mount via fusermount",
strerror (errno));
- ret = fuse_mount_fusermount (mountpoint, fsname,
+ ret = fuse_mount_fusermount (mountpoint, fsname, mountflags,
mnt_param, fd);
}
if (ret == -1)
- GFFUSE_LOGERR ("mount failed");
+ GFFUSE_LOGERR ("mount of %s to %s (%s) failed",
+ fsname, mountpoint, mnt_param);
if (status_fd >= 0)
(void)write (status_fd, &ret, sizeof (ret));
diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
index 6e9b31c77..971d3d220 100644
--- a/contrib/fuse-util/Makefile.am
+++ b/contrib/fuse-util/Makefile.am
@@ -3,7 +3,9 @@ bin_PROGRAMS = fusermount-glusterfs
fusermount_glusterfs_SOURCES = fusermount.c mount_util.c $(CONTRIBDIR)/fuse-lib/mount-common.c
noinst_HEADERS = $(CONTRIBDIR)/fuse-include/mount_util.h
-AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -DFUSE_UTIL $(GF_CFLAGS) -D_GNU_SOURCE -I$(CONTRIBDIR)/fuse-include
+AM_CPPFLAGS = $(GF_CPPFLAGS) -DFUSE_UTIL -I$(CONTRIBDIR)/fuse-include
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
install-exec-hook:
-chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
diff --git a/contrib/fuse-util/fusermount.c b/contrib/fuse-util/fusermount.c
index 9a8952571..0ff8d9039 100644
--- a/contrib/fuse-util/fusermount.c
+++ b/contrib/fuse-util/fusermount.c
@@ -505,13 +505,13 @@ static void read_conf(void)
int isnewline = 1;
while (fgets(line, sizeof(line), fp) != NULL) {
if (isnewline) {
- if (line[strlen(line)-1] == '\n') {
+ if (strlen(line) && line[strlen(line)-1] == '\n') {
strip_line(line);
parse_line(line, linenum);
} else {
isnewline = 0;
}
- } else if(line[strlen(line)-1] == '\n') {
+ } else if(strlen(line) && line[strlen(line)-1] == '\n') {
fprintf(stderr, "%s: reading %s: line %i too long\n", progname, FUSE_CONF, linenum);
isnewline = 1;
@@ -606,7 +606,7 @@ static int add_option(char **optsp, const char *opt, unsigned expand)
static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
{
int i;
- int l;
+ size_t l;
if (!(flags & MS_RDONLY) && add_option(mnt_optsp, "rw", 0) == -1)
return -1;
@@ -621,7 +621,7 @@ static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
return -1;
/* remove comma from end of opts*/
l = strlen(*mnt_optsp);
- if ((*mnt_optsp)[l-1] == ',')
+ if (l && (*mnt_optsp)[l-1] == ',')
(*mnt_optsp)[l-1] = '\0';
if (getuid() != 0) {
const char *user = get_user_name();
@@ -650,8 +650,7 @@ static int get_string_opt(const char *s, unsigned len, const char *opt,
unsigned opt_len = strlen(opt);
char *d;
- if (*val)
- free(*val);
+ free(*val);
*val = (char *) malloc(len - opt_len + 1);
if (!*val) {
fprintf(stderr, "%s: failed to allocate memory\n", progname);
diff --git a/contrib/libgen/basename_r.c b/contrib/libgen/basename_r.c
index e3fae60d1..2c3a87afe 100644
--- a/contrib/libgen/basename_r.c
+++ b/contrib/libgen/basename_r.c
@@ -1,7 +1,10 @@
/*
- * borrowed from glibc-2.12.1/string/basename.c
+ * borrowed from glibc-2.12.1/string/basename.c
* Modified to return "." for NULL or "", as required for SUSv2.
*/
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_BASENAME
/* Return the name-within-directory of a file name.
Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
@@ -34,3 +37,4 @@ basename_r (filename)
p = strrchr (filename, '/');
return p ? p + 1 : (char *) filename;
}
+#endif /* THREAD_UNSAFE_BASENAME */
diff --git a/contrib/libgen/dirname_r.c b/contrib/libgen/dirname_r.c
index 78fe0ee8c..131cbcf2a 100644
--- a/contrib/libgen/dirname_r.c
+++ b/contrib/libgen/dirname_r.c
@@ -4,6 +4,9 @@
* Removed code for long bigger than 32 bytes, renamed __ptr_t as void *
* changed reg_char type to char.
*/
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_DIRNAME
/* memrchr -- find the last occurrence of a byte in a memory block
Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
@@ -237,3 +240,4 @@ dirname_r (char *path)
return path;
}
+#endif /* THREAD_UNSAFE_DIRNAME */
diff --git a/contrib/qemu/block.c b/contrib/qemu/block.c
new file mode 100644
index 000000000..b56024113
--- /dev/null
+++ b/contrib/qemu/block.c
@@ -0,0 +1,4604 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor/monitor.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "qemu/module.h"
+#include "qapi/qmp/qjson.h"
+#include "sysemu/sysemu.h"
+#include "qemu/notify.h"
+#include "block/coroutine.h"
+#include "qmp-commands.h"
+#include "qemu/timer.h"
+
+#ifdef CONFIG_BSD
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/queue.h>
+#ifndef __DragonFly__
+#include <sys/disk.h>
+#endif
+#endif
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+typedef enum {
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
+} BdrvRequestFlags;
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov);
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
+static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait);
+
+static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
+ QTAILQ_HEAD_INITIALIZER(bdrv_states);
+
+static QLIST_HEAD(, BlockDriver) bdrv_drivers =
+ QLIST_HEAD_INITIALIZER(bdrv_drivers);
+
+/* If non-zero, use only whitelisted block drivers */
+static int use_bdrv_whitelist;
+
+#ifdef _WIN32
+static int is_windows_drive_prefix(const char *filename)
+{
+ return (((filename[0] >= 'a' && filename[0] <= 'z') ||
+ (filename[0] >= 'A' && filename[0] <= 'Z')) &&
+ filename[1] == ':');
+}
+
+int is_windows_drive(const char *filename)
+{
+ if (is_windows_drive_prefix(filename) &&
+ filename[2] == '\0')
+ return 1;
+ if (strstart(filename, "\\\\.\\", NULL) ||
+ strstart(filename, "//./", NULL))
+ return 1;
+ return 0;
+}
+#endif
+
+/* throttling disk I/O limits */
+void bdrv_io_limits_disable(BlockDriverState *bs)
+{
+ bs->io_limits_enabled = false;
+
+ while (qemu_co_queue_next(&bs->throttled_reqs));
+
+ if (bs->block_timer) {
+ qemu_del_timer(bs->block_timer);
+ qemu_free_timer(bs->block_timer);
+ bs->block_timer = NULL;
+ }
+
+ bs->slice_start = 0;
+ bs->slice_end = 0;
+}
+
+static void bdrv_block_timer(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+void bdrv_io_limits_enable(BlockDriverState *bs)
+{
+ qemu_co_queue_init(&bs->throttled_reqs);
+ bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+ bs->io_limits_enabled = true;
+}
+
+bool bdrv_io_limits_enabled(BlockDriverState *bs)
+{
+ BlockIOLimit *io_limits = &bs->io_limits;
+ return io_limits->bps[BLOCK_IO_LIMIT_READ]
+ || io_limits->bps[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->bps[BLOCK_IO_LIMIT_TOTAL]
+ || io_limits->iops[BLOCK_IO_LIMIT_READ]
+ || io_limits->iops[BLOCK_IO_LIMIT_WRITE]
+ || io_limits->iops[BLOCK_IO_LIMIT_TOTAL];
+}
+
+static void bdrv_io_limits_intercept(BlockDriverState *bs,
+ bool is_write, int nb_sectors)
+{
+ int64_t wait_time = -1;
+
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_wait(&bs->throttled_reqs);
+ }
+
+ /* In fact, we hope to keep each request's timing, in FIFO mode. The next
+ * throttled requests will not be dequeued until the current request is
+ * allowed to be serviced. So if the current request still exceeds the
+ * limits, it will be inserted to the head. All requests followed it will
+ * be still in throttled_reqs queue.
+ */
+
+ while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) {
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ qemu_co_queue_wait_insert_head(&bs->throttled_reqs);
+ }
+
+ qemu_co_queue_next(&bs->throttled_reqs);
+}
+
+/* check if the path starts with "<protocol>:" */
+static int path_has_protocol(const char *path)
+{
+ const char *p;
+
+#ifdef _WIN32
+ if (is_windows_drive(path) ||
+ is_windows_drive_prefix(path)) {
+ return 0;
+ }
+ p = path + strcspn(path, ":/\\");
+#else
+ p = path + strcspn(path, ":/");
+#endif
+
+ return *p == ':';
+}
+
+int path_is_absolute(const char *path)
+{
+#ifdef _WIN32
+ /* specific case for names like: "\\.\d:" */
+ if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
+ return 1;
+ }
+ return (*path == '/' || *path == '\\');
+#else
+ return (*path == '/');
+#endif
+}
+
+/* if filename is absolute, just copy it to dest. Otherwise, build a
+ path to it by considering it is relative to base_path. URL are
+ supported. */
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename)
+{
+ const char *p, *p1;
+ int len;
+
+ if (dest_size <= 0)
+ return;
+ if (path_is_absolute(filename)) {
+ pstrcpy(dest, dest_size, filename);
+ } else {
+ p = strchr(base_path, ':');
+ if (p)
+ p++;
+ else
+ p = base_path;
+ p1 = strrchr(base_path, '/');
+#ifdef _WIN32
+ {
+ const char *p2;
+ p2 = strrchr(base_path, '\\');
+ if (!p1 || p2 > p1)
+ p1 = p2;
+ }
+#endif
+ if (p1)
+ p1++;
+ else
+ p1 = base_path;
+ if (p1 > p)
+ p = p1;
+ len = p - base_path;
+ if (len > dest_size - 1)
+ len = dest_size - 1;
+ memcpy(dest, base_path, len);
+ dest[len] = '\0';
+ pstrcat(dest, dest_size, filename);
+ }
+}
+
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+{
+ if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
+ pstrcpy(dest, sz, bs->backing_file);
+ } else {
+ path_combine(dest, sz, bs->filename, bs->backing_file);
+ }
+}
+
+void bdrv_register(BlockDriver *bdrv)
+{
+ /* Block drivers without coroutine functions need emulation */
+ if (!bdrv->bdrv_co_readv) {
+ bdrv->bdrv_co_readv = bdrv_co_readv_em;
+ bdrv->bdrv_co_writev = bdrv_co_writev_em;
+
+ /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
+ * the block driver lacks aio we need to emulate that too.
+ */
+ if (!bdrv->bdrv_aio_readv) {
+ /* add AIO emulation layer */
+ bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
+ bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
+ }
+ }
+
+ QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
+}
+
+/* create a new block device (by default it is empty) */
+BlockDriverState *bdrv_new(const char *device_name)
+{
+ BlockDriverState *bs;
+
+ bs = g_malloc0(sizeof(BlockDriverState));
+ pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
+ if (device_name[0] != '\0') {
+ QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
+ }
+ bdrv_iostatus_disable(bs);
+ notifier_list_init(&bs->close_notifiers);
+ notifier_with_return_list_init(&bs->before_write_notifiers);
+
+ return bs;
+}
+
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
+{
+ notifier_list_add(&bs->close_notifiers, notify);
+}
+
+BlockDriver *bdrv_find_format(const char *format_name)
+{
+ BlockDriver *drv1;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (!strcmp(drv1->format_name, format_name)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
+{
+ static const char *whitelist_rw[] = {
+ CONFIG_BDRV_RW_WHITELIST
+ };
+ static const char *whitelist_ro[] = {
+ CONFIG_BDRV_RO_WHITELIST
+ };
+ const char **p;
+
+ if (!whitelist_rw[0] && !whitelist_ro[0]) {
+ return 1; /* no whitelist, anything goes */
+ }
+
+ for (p = whitelist_rw; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ if (read_only) {
+ for (p = whitelist_ro; *p; p++) {
+ if (!strcmp(drv->format_name, *p)) {
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool read_only)
+{
+ BlockDriver *drv = bdrv_find_format(format_name);
+ return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
+}
+
+typedef struct CreateCo {
+ BlockDriver *drv;
+ char *filename;
+ QEMUOptionParameter *options;
+ int ret;
+} CreateCo;
+
+static void coroutine_fn bdrv_create_co_entry(void *opaque)
+{
+ CreateCo *cco = opaque;
+ assert(cco->drv);
+
+ cco->ret = cco->drv->bdrv_create(cco->filename, cco->options);
+}
+
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options)
+{
+ int ret;
+
+ Coroutine *co;
+ CreateCo cco = {
+ .drv = drv,
+ .filename = g_strdup(filename),
+ .options = options,
+ .ret = NOT_DONE,
+ };
+
+ if (!drv->bdrv_create) {
+ ret = -ENOTSUP;
+ goto out;
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_create_co_entry(&cco);
+ } else {
+ co = qemu_coroutine_create(bdrv_create_co_entry);
+ qemu_coroutine_enter(co, &cco);
+ while (cco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ ret = cco.ret;
+
+out:
+ g_free(cco.filename);
+ return ret;
+}
+
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
+{
+ BlockDriver *drv;
+
+ drv = bdrv_find_protocol(filename, true);
+ if (drv == NULL) {
+ return -ENOENT;
+ }
+
+ return bdrv_create(drv, filename, options);
+}
+
+/*
+ * Create a uniquely-named empty temporary file.
+ * Return 0 upon success, otherwise a negative errno value.
+ */
+int get_tmp_filename(char *filename, int size)
+{
+#ifdef _WIN32
+ char temp_dir[MAX_PATH];
+ /* GetTempFileName requires that its output buffer (4th param)
+ have length MAX_PATH or greater. */
+ assert(size >= MAX_PATH);
+ return (GetTempPath(MAX_PATH, temp_dir)
+ && GetTempFileName(temp_dir, "qem", 0, filename)
+ ? 0 : -GetLastError());
+#else
+ int fd;
+ const char *tmpdir;
+ tmpdir = getenv("TMPDIR");
+ if (!tmpdir)
+ tmpdir = "/tmp";
+ if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
+ return -EOVERFLOW;
+ }
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ return -errno;
+ }
+ if (close(fd) != 0) {
+ unlink(filename);
+ return -errno;
+ }
+ return 0;
+#endif
+}
+
+/*
+ * Detect host devices. By convention, /dev/cdrom[N] is always
+ * recognized as a host CDROM.
+ */
+static BlockDriver *find_hdev_driver(const char *filename)
+{
+ int score_max = 0, score;
+ BlockDriver *drv = NULL, *d;
+
+ QLIST_FOREACH(d, &bdrv_drivers, list) {
+ if (d->bdrv_probe_device) {
+ score = d->bdrv_probe_device(filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = d;
+ }
+ }
+ }
+
+ return drv;
+}
+
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix)
+{
+ BlockDriver *drv1;
+ char protocol[128];
+ int len;
+ const char *p;
+
+ /* TODO Drivers without bdrv_file_open must be specified explicitly */
+
+ /*
+ * XXX(hch): we really should not let host device detection
+ * override an explicit protocol specification, but moving this
+ * later breaks access to device names with colons in them.
+ * Thanks to the brain-dead persistent naming schemes on udev-
+ * based Linux systems those actually are quite common.
+ */
+ drv1 = find_hdev_driver(filename);
+ if (drv1) {
+ return drv1;
+ }
+
+ if (!path_has_protocol(filename) || !allow_protocol_prefix) {
+ return bdrv_find_format("file");
+ }
+
+ p = strchr(filename, ':');
+ assert(p != NULL);
+ len = p - filename;
+ if (len > sizeof(protocol) - 1)
+ len = sizeof(protocol) - 1;
+ memcpy(protocol, filename, len);
+ protocol[len] = '\0';
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->protocol_name &&
+ !strcmp(drv1->protocol_name, protocol)) {
+ return drv1;
+ }
+ }
+ return NULL;
+}
+
+static int find_image_format(BlockDriverState *bs, const char *filename,
+ BlockDriver **pdrv)
+{
+ int score, score_max;
+ BlockDriver *drv1, *drv;
+ uint8_t buf[2048];
+ int ret = 0;
+
+ /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
+ if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
+ drv = bdrv_find_format("raw");
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+ }
+
+ ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ if (ret < 0) {
+ *pdrv = NULL;
+ return ret;
+ }
+
+ score_max = 0;
+ drv = NULL;
+ QLIST_FOREACH(drv1, &bdrv_drivers, list) {
+ if (drv1->bdrv_probe) {
+ score = drv1->bdrv_probe(buf, ret, filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = drv1;
+ }
+ }
+ }
+ if (!drv) {
+ ret = -ENOENT;
+ }
+ *pdrv = drv;
+ return ret;
+}
+
+/**
+ * Set the current 'total_sectors' value
+ */
+static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
+{
+ BlockDriver *drv = bs->drv;
+
+ /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
+ if (bs->sg)
+ return 0;
+
+ /* query actual device if possible, otherwise just trust the hint */
+ if (drv->bdrv_getlength) {
+ int64_t length = drv->bdrv_getlength(bs);
+ if (length < 0) {
+ return length;
+ }
+ hint = length >> BDRV_SECTOR_BITS;
+ }
+
+ bs->total_sectors = hint;
+ return 0;
+}
+
+/**
+ * Set open flags for a given discard mode
+ *
+ * Return 0 on success, -1 if the discard mode was invalid.
+ */
+int bdrv_parse_discard_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_UNMAP;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
+ /* do nothing */
+ } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
+ *flags |= BDRV_O_UNMAP;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Set open flags for a given cache mode
+ *
+ * Return 0 on success, -1 if the cache mode was invalid.
+ */
+int bdrv_parse_cache_flags(const char *mode, int *flags)
+{
+ *flags &= ~BDRV_O_CACHE_MASK;
+
+ if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
+ *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "directsync")) {
+ *flags |= BDRV_O_NOCACHE;
+ } else if (!strcmp(mode, "writeback")) {
+ *flags |= BDRV_O_CACHE_WB;
+ } else if (!strcmp(mode, "unsafe")) {
+ *flags |= BDRV_O_CACHE_WB;
+ *flags |= BDRV_O_NO_FLUSH;
+ } else if (!strcmp(mode, "writethrough")) {
+ /* this is the default */
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * The copy-on-read flag is actually a reference count so multiple users may
+ * use the feature without worrying about clobbering its previous state.
+ * Copy-on-read stays enabled until all users have called to disable it.
+ */
+void bdrv_enable_copy_on_read(BlockDriverState *bs)
+{
+ bs->copy_on_read++;
+}
+
+void bdrv_disable_copy_on_read(BlockDriverState *bs)
+{
+ assert(bs->copy_on_read > 0);
+ bs->copy_on_read--;
+}
+
+static int bdrv_open_flags(BlockDriverState *bs, int flags)
+{
+ int open_flags = flags | BDRV_O_CACHE_WB;
+
+ /*
+ * Clear flags that are internal to the block layer before opening the
+ * image.
+ */
+ open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ /*
+ * Snapshots should be writable.
+ */
+ if (bs->is_temporary) {
+ open_flags |= BDRV_O_RDWR;
+ }
+
+ return open_flags;
+}
+
+/*
+ * Common part for opening disk images and files
+ *
+ * Removes all processed options from *options.
+ */
+static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
+ QDict *options, int flags, BlockDriver *drv)
+{
+ int ret, open_flags;
+ const char *filename;
+
+ assert(drv != NULL);
+ assert(bs->file == NULL);
+ assert(options != NULL && bs->options != options);
+
+ if (file != NULL) {
+ filename = file->filename;
+ } else {
+ filename = qdict_get_try_str(options, "filename");
+ }
+
+ trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
+
+ /* bdrv_open() with directly using a protocol as drv. This layer is already
+ * opened, so assign it to bs (while file becomes a closed BlockDriverState)
+ * and return immediately. */
+ if (file != NULL && drv->bdrv_file_open) {
+ bdrv_swap(file, bs);
+ return 0;
+ }
+
+ bs->open_flags = flags;
+ bs->buffer_alignment = 512;
+ open_flags = bdrv_open_flags(bs, flags);
+ bs->read_only = !(open_flags & BDRV_O_RDWR);
+
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
+ return -ENOTSUP;
+ }
+
+ assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
+ if (!bs->read_only && (flags & BDRV_O_COPY_ON_READ)) {
+ bdrv_enable_copy_on_read(bs);
+ }
+
+ if (filename != NULL) {
+ pstrcpy(bs->filename, sizeof(bs->filename), filename);
+ } else {
+ bs->filename[0] = '\0';
+ }
+
+ bs->drv = drv;
+ bs->opaque = g_malloc0(drv->instance_size);
+
+ bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
+
+ /* Open the image, either directly or using a protocol */
+ if (drv->bdrv_file_open) {
+ assert(file == NULL);
+ assert(drv->bdrv_parse_filename || filename != NULL);
+ ret = drv->bdrv_file_open(bs, options, open_flags);
+ } else {
+ if (file == NULL) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't use '%s' as a "
+ "block driver for the protocol level",
+ drv->format_name);
+ ret = -EINVAL;
+ goto free_and_fail;
+ }
+ assert(file != NULL);
+ bs->file = file;
+ ret = drv->bdrv_open(bs, options, open_flags);
+ }
+
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+ ret = refresh_total_sectors(bs, bs->total_sectors);
+ if (ret < 0) {
+ goto free_and_fail;
+ }
+
+#ifndef _WIN32
+ if (bs->is_temporary) {
+ assert(filename != NULL);
+ unlink(filename);
+ }
+#endif
+ return 0;
+
+free_and_fail:
+ bs->file = NULL;
+ g_free(bs->opaque);
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ return ret;
+}
+
+/*
+ * Opens a file using a protocol (file, host_device, nbd, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags)
+{
+ BlockDriverState *bs;
+ BlockDriver *drv;
+ const char *drvname;
+ bool allow_protocol_prefix = false;
+ int ret;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs = bdrv_new("");
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* Fetch the file name from the options QDict if necessary */
+ if (!filename) {
+ filename = qdict_get_try_str(options, "filename");
+ } else if (filename && !qdict_haskey(options, "filename")) {
+ qdict_put(options, "filename", qstring_from_str(filename));
+ allow_protocol_prefix = true;
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Can't specify 'file' and "
+ "'filename' options at the same time");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* Find the right block driver */
+ drvname = qdict_get_try_str(options, "driver");
+ if (drvname) {
+ drv = bdrv_find_whitelisted_format(drvname, !(flags & BDRV_O_RDWR));
+ qdict_del(options, "driver");
+ } else if (filename) {
+ drv = bdrv_find_protocol(filename, allow_protocol_prefix);
+ if (!drv) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Unknown protocol");
+ }
+ } else {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "Must specify either driver or file");
+ drv = NULL;
+ }
+
+ if (!drv) {
+ ret = -ENOENT;
+ goto fail;
+ }
+
+ /* Parse the filename and open it */
+ if (drv->bdrv_parse_filename && filename) {
+ Error *local_err = NULL;
+ drv->bdrv_parse_filename(filename, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+ qdict_del(options, "filename");
+ } else if (!drv->bdrv_parse_filename && !filename) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR,
+ "The '%s' block driver requires a file name",
+ drv->format_name);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = bdrv_open_common(bs, NULL, options, flags, drv);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block protocol '%s' doesn't "
+ "support the option '%s'",
+ drv->format_name, entry->key);
+ ret = -EINVAL;
+ goto fail;
+ }
+ QDECREF(options);
+
+ bs->growable = 1;
+ *pbs = bs;
+ return 0;
+
+fail:
+ QDECREF(options);
+ if (!bs->drv) {
+ QDECREF(bs->options);
+ }
+ bdrv_delete(bs);
+ return ret;
+}
+
+/*
+ * Opens the backing file for a BlockDriverState if not yet open
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict is transferred to this
+ * function (even on failure), so if the caller intends to reuse the dictionary,
+ * it needs to use QINCREF() before calling bdrv_file_open.
+ */
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options)
+{
+ char backing_filename[PATH_MAX];
+ int back_flags, ret;
+ BlockDriver *back_drv = NULL;
+
+ if (bs->backing_hd != NULL) {
+ QDECREF(options);
+ return 0;
+ }
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->open_flags &= ~BDRV_O_NO_BACKING;
+ if (qdict_haskey(options, "file.filename")) {
+ backing_filename[0] = '\0';
+ } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
+ QDECREF(options);
+ return 0;
+ }
+
+ bs->backing_hd = bdrv_new("");
+ bdrv_get_full_backing_filename(bs, backing_filename,
+ sizeof(backing_filename));
+
+ if (bs->backing_format[0] != '\0') {
+ back_drv = bdrv_find_format(bs->backing_format);
+ }
+
+ /* backing files always opened read-only */
+ back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
+
+ ret = bdrv_open(bs->backing_hd,
+ *backing_filename ? backing_filename : NULL, options,
+ back_flags, back_drv);
+ if (ret < 0) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ bs->open_flags |= BDRV_O_NO_BACKING;
+ return ret;
+ }
+ return 0;
+}
+
+static void extract_subqdict(QDict *src, QDict **dst, const char *start)
+{
+ const QDictEntry *entry, *next;
+ const char *p;
+
+ *dst = qdict_new();
+ entry = qdict_first(src);
+
+ while (entry != NULL) {
+ next = qdict_next(src, entry);
+ if (strstart(entry->key, start, &p)) {
+ qobject_incref(entry->value);
+ qdict_put_obj(*dst, p, entry->value);
+ qdict_del(src, entry->key);
+ }
+ entry = next;
+ }
+}
+
+/*
+ * Opens a disk image (raw, qcow2, vmdk, ...)
+ *
+ * options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use QINCREF() before calling bdrv_open.
+ */
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv)
+{
+ int ret;
+ /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
+ char tmp_filename[PATH_MAX + 1];
+ BlockDriverState *file = NULL;
+ QDict *file_options = NULL;
+
+ /* NULL means an empty set of options */
+ if (options == NULL) {
+ options = qdict_new();
+ }
+
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
+ /* For snapshot=on, create a temporary qcow2 overlay */
+ if (flags & BDRV_O_SNAPSHOT) {
+ BlockDriverState *bs1;
+ int64_t total_size;
+ BlockDriver *bdrv_qcow2;
+ QEMUOptionParameter *create_options;
+ char backing_filename[PATH_MAX];
+
+ if (qdict_size(options) != 0) {
+ error_report("Can't use snapshot=on with driver-specific options");
+ ret = -EINVAL;
+ goto fail;
+ }
+ assert(filename != NULL);
+
+ /* if snapshot, we create a temporary backing file and open it
+ instead of opening 'filename' directly */
+
+ /* if there is a backing file, use it */
+ bs1 = bdrv_new("");
+ ret = bdrv_open(bs1, filename, NULL, 0, drv);
+ if (ret < 0) {
+ bdrv_delete(bs1);
+ goto fail;
+ }
+ total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
+
+ bdrv_delete(bs1);
+
+ ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Real path is meaningless for protocols */
+ if (path_has_protocol(filename)) {
+ snprintf(backing_filename, sizeof(backing_filename),
+ "%s", filename);
+ } else if (!realpath(filename, backing_filename)) {
+ ret = -errno;
+ goto fail;
+ }
+
+ bdrv_qcow2 = bdrv_find_format("qcow2");
+ create_options = parse_option_parameters("", bdrv_qcow2->create_options,
+ NULL);
+
+ set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size);
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FILE,
+ backing_filename);
+ if (drv) {
+ set_option_parameter(create_options, BLOCK_OPT_BACKING_FMT,
+ drv->format_name);
+ }
+
+ ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options);
+ free_option_parameters(create_options);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ filename = tmp_filename;
+ drv = bdrv_qcow2;
+ bs->is_temporary = 1;
+ }
+
+ /* Open image file without format layer */
+ if (flags & BDRV_O_RDWR) {
+ flags |= BDRV_O_ALLOW_RDWR;
+ }
+
+ extract_subqdict(options, &file_options, "file.");
+
+ ret = bdrv_file_open(&file, filename, file_options,
+ bdrv_open_flags(bs, flags | BDRV_O_UNMAP));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Find the right image format driver */
+ if (!drv) {
+ ret = find_image_format(file, filename, &drv);
+ }
+
+ if (!drv) {
+ goto unlink_and_fail;
+ }
+
+ /* Open the image */
+ ret = bdrv_open_common(bs, file, options, flags, drv);
+ if (ret < 0) {
+ goto unlink_and_fail;
+ }
+
+ if (bs->file != file) {
+ bdrv_delete(file);
+ file = NULL;
+ }
+
+ /* If there is a backing file, use it */
+ if ((flags & BDRV_O_NO_BACKING) == 0) {
+ QDict *backing_options;
+
+ extract_subqdict(options, &backing_options, "backing.");
+ ret = bdrv_open_backing_file(bs, backing_options);
+ if (ret < 0) {
+ goto close_and_fail;
+ }
+ }
+
+ /* Check if any unknown options were used */
+ if (qdict_size(options) != 0) {
+ const QDictEntry *entry = qdict_first(options);
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by "
+ "device '%s' doesn't support the option '%s'",
+ drv->format_name, bs->device_name, entry->key);
+
+ ret = -EINVAL;
+ goto close_and_fail;
+ }
+ QDECREF(options);
+
+ if (!bdrv_key_required(bs)) {
+ bdrv_dev_change_media_cb(bs, true);
+ }
+
+ /* throttling disk I/O limits */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_enable(bs);
+ }
+
+ return 0;
+
+unlink_and_fail:
+ if (file != NULL) {
+ bdrv_delete(file);
+ }
+ if (bs->is_temporary) {
+ unlink(filename);
+ }
+fail:
+ QDECREF(bs->options);
+ QDECREF(options);
+ bs->options = NULL;
+ return ret;
+
+close_and_fail:
+ bdrv_close(bs);
+ QDECREF(options);
+ return ret;
+}
+
+typedef struct BlockReopenQueueEntry {
+ bool prepared;
+ BDRVReopenState state;
+ QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags)
+{
+ assert(bs != NULL);
+
+ BlockReopenQueueEntry *bs_entry;
+ if (bs_queue == NULL) {
+ bs_queue = g_new0(BlockReopenQueue, 1);
+ QSIMPLEQ_INIT(bs_queue);
+ }
+
+ if (bs->file) {
+ bdrv_reopen_queue(bs_queue, bs->file, flags);
+ }
+
+ bs_entry = g_new0(BlockReopenQueueEntry, 1);
+ QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+ bs_entry->state.bs = bs;
+ bs_entry->state.flags = flags;
+
+ return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags. All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+ int ret = -1;
+ BlockReopenQueueEntry *bs_entry, *next;
+ Error *local_err = NULL;
+
+ assert(bs_queue != NULL);
+
+ bdrv_drain_all();
+
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+ bs_entry->prepared = true;
+ }
+
+ /* If we reach this point, we have success and just need to apply the
+ * changes
+ */
+ QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+ bdrv_reopen_commit(&bs_entry->state);
+ }
+
+ ret = 0;
+
+cleanup:
+ QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ if (ret && bs_entry->prepared) {
+ bdrv_reopen_abort(&bs_entry->state);
+ }
+ g_free(bs_entry);
+ }
+ g_free(bs_queue);
+ return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+ ret = bdrv_reopen_multiple(queue, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ }
+ return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'opaque' field of the BDRVReopenState, which is used and allocated by
+ * the block driver layer .bdrv_reopen_prepare()
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ * queue is the reopen queue
+ *
+ * Returns 0 on success, non-zero on error. On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ int ret = -1;
+ Error *local_err = NULL;
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ assert(reopen_state->bs->drv != NULL);
+ drv = reopen_state->bs->drv;
+
+ /* if we are to stay read-only, do not allow permission change
+ * to r/w */
+ if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
+ reopen_state->flags & BDRV_O_RDWR) {
+ error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+ reopen_state->bs->device_name);
+ goto error;
+ }
+
+
+ ret = bdrv_flush(reopen_state->bs);
+ if (ret) {
+ error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
+ strerror(-ret));
+ goto error;
+ }
+
+ if (drv->bdrv_reopen_prepare) {
+ ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
+ if (ret) {
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ } else {
+ error_setg(errp, "failed while preparing to reopen image '%s'",
+ reopen_state->bs->filename);
+ }
+ goto error;
+ }
+ } else {
+ /* It is currently mandatory to have a bdrv_reopen_prepare()
+ * handler for each supported drv. */
+ error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ drv->format_name, reopen_state->bs->device_name,
+ "reopening of file");
+ ret = -1;
+ goto error;
+ }
+
+ ret = 0;
+
+error:
+ return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ /* If there are any driver level actions to take */
+ if (drv->bdrv_reopen_commit) {
+ drv->bdrv_reopen_commit(reopen_state);
+ }
+
+ /* set BDS specific flags now */
+ reopen_state->bs->open_flags = reopen_state->flags;
+ reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+ BDRV_O_CACHE_WB);
+ reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+ BlockDriver *drv;
+
+ assert(reopen_state != NULL);
+ drv = reopen_state->bs->drv;
+ assert(drv != NULL);
+
+ if (drv->bdrv_reopen_abort) {
+ drv->bdrv_reopen_abort(reopen_state);
+ }
+}
+
+
+void bdrv_close(BlockDriverState *bs)
+{
+ if (bs->job) {
+ block_job_cancel_sync(bs->job);
+ }
+ bdrv_drain_all(); /* complete I/O */
+ bdrv_flush(bs);
+ bdrv_drain_all(); /* in case flush left pending I/O */
+ notifier_list_notify(&bs->close_notifiers, bs);
+
+ if (bs->drv) {
+ if (bs->backing_hd) {
+ bdrv_delete(bs->backing_hd);
+ bs->backing_hd = NULL;
+ }
+ bs->drv->bdrv_close(bs);
+ g_free(bs->opaque);
+#ifdef _WIN32
+ if (bs->is_temporary) {
+ unlink(bs->filename);
+ }
+#endif
+ bs->opaque = NULL;
+ bs->drv = NULL;
+ bs->copy_on_read = 0;
+ bs->backing_file[0] = '\0';
+ bs->backing_format[0] = '\0';
+ bs->total_sectors = 0;
+ bs->encrypted = 0;
+ bs->valid_key = 0;
+ bs->sg = 0;
+ bs->growable = 0;
+ QDECREF(bs->options);
+ bs->options = NULL;
+
+ if (bs->file != NULL) {
+ bdrv_delete(bs->file);
+ bs->file = NULL;
+ }
+ }
+
+ bdrv_dev_change_media_cb(bs, false);
+
+ /*throttling disk I/O limits*/
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_disable(bs);
+ }
+}
+
+void bdrv_close_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_close(bs);
+ }
+}
+
+/*
+ * Wait for pending requests to complete across all BlockDriverStates
+ *
+ * This function does not flush data to disk, use bdrv_flush_all() for that
+ * after calling this function.
+ *
+ * Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a coroutine
+ * can be arbitrarily complex and a constant flow of I/O can come until the
+ * coroutine is complete. Because of this, it is not possible to have a
+ * function to drain a single device's I/O queue.
+ */
+void bdrv_drain_all(void)
+{
+ BlockDriverState *bs;
+ bool busy;
+
+ do {
+ busy = qemu_aio_wait();
+
+ /* FIXME: We do not have timer support here, so this is effectively
+ * a busy wait.
+ */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!qemu_co_queue_empty(&bs->throttled_reqs)) {
+ qemu_co_queue_restart_all(&bs->throttled_reqs);
+ busy = true;
+ }
+ }
+ } while (busy);
+
+ /* If requests are still pending there is a bug somewhere */
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ assert(QLIST_EMPTY(&bs->tracked_requests));
+ assert(qemu_co_queue_empty(&bs->throttled_reqs));
+ }
+}
+
+/* make a BlockDriverState anonymous by removing from bdrv_state list.
+ Also, NULL terminate the device_name to prevent double remove */
+void bdrv_make_anon(BlockDriverState *bs)
+{
+ if (bs->device_name[0] != '\0') {
+ QTAILQ_REMOVE(&bdrv_states, bs, list);
+ }
+ bs->device_name[0] = '\0';
+}
+
+static void bdrv_rebind(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_rebind) {
+ bs->drv->bdrv_rebind(bs);
+ }
+}
+
+static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
+ BlockDriverState *bs_src)
+{
+ /* move some fields that need to stay attached to the device */
+ bs_dest->open_flags = bs_src->open_flags;
+
+ /* dev info */
+ bs_dest->dev_ops = bs_src->dev_ops;
+ bs_dest->dev_opaque = bs_src->dev_opaque;
+ bs_dest->dev = bs_src->dev;
+ bs_dest->buffer_alignment = bs_src->buffer_alignment;
+ bs_dest->copy_on_read = bs_src->copy_on_read;
+
+ bs_dest->enable_write_cache = bs_src->enable_write_cache;
+
+ /* i/o timing parameters */
+ bs_dest->slice_start = bs_src->slice_start;
+ bs_dest->slice_end = bs_src->slice_end;
+ bs_dest->slice_submitted = bs_src->slice_submitted;
+ bs_dest->io_limits = bs_src->io_limits;
+ bs_dest->throttled_reqs = bs_src->throttled_reqs;
+ bs_dest->block_timer = bs_src->block_timer;
+ bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
+
+ /* r/w error */
+ bs_dest->on_read_error = bs_src->on_read_error;
+ bs_dest->on_write_error = bs_src->on_write_error;
+
+ /* i/o status */
+ bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
+ bs_dest->iostatus = bs_src->iostatus;
+
+ /* dirty bitmap */
+ bs_dest->dirty_bitmap = bs_src->dirty_bitmap;
+
+ /* job */
+ bs_dest->in_use = bs_src->in_use;
+ bs_dest->job = bs_src->job;
+
+ /* keep the same entry in bdrv_states */
+ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name),
+ bs_src->device_name);
+ bs_dest->list = bs_src->list;
+}
+
+/*
+ * Swap bs contents for two image chains while they are live,
+ * while keeping required fields on the BlockDriverState that is
+ * actually attached to a device.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_old. Both bs_new and bs_old are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
+{
+ BlockDriverState tmp;
+
+ /* bs_new must be anonymous and shouldn't have anything fancy enabled */
+ assert(bs_new->device_name[0] == '\0');
+ assert(bs_new->dirty_bitmap == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->dev == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ tmp = *bs_new;
+ *bs_new = *bs_old;
+ *bs_old = tmp;
+
+ /* there are some fields that should not be swapped, move them back */
+ bdrv_move_feature_fields(&tmp, bs_old);
+ bdrv_move_feature_fields(bs_old, bs_new);
+ bdrv_move_feature_fields(bs_new, &tmp);
+
+ /* bs_new shouldn't be in bdrv_states even after the swap! */
+ assert(bs_new->device_name[0] == '\0');
+
+ /* Check a few fields that should remain attached to the device */
+ assert(bs_new->dev == NULL);
+ assert(bs_new->job == NULL);
+ assert(bs_new->in_use == 0);
+ assert(bs_new->io_limits_enabled == false);
+ assert(bs_new->block_timer == NULL);
+
+ bdrv_rebind(bs_new);
+ bdrv_rebind(bs_old);
+}
+
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+ bdrv_swap(bs_new, bs_top);
+
+ /* The contents of 'tmp' will become bs_top, as we are
+ * swapping bs_new and bs_top contents. */
+ bs_top->backing_hd = bs_new;
+ bs_top->open_flags &= ~BDRV_O_NO_BACKING;
+ pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file),
+ bs_new->filename);
+ pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format),
+ bs_new->drv ? bs_new->drv->format_name : "");
+}
+
+void bdrv_delete(BlockDriverState *bs)
+{
+ assert(!bs->dev);
+ assert(!bs->job);
+ assert(!bs->in_use);
+
+ /* remove from list, if necessary */
+ bdrv_make_anon(bs);
+
+ bdrv_close(bs);
+
+ g_free(bs);
+}
+
+int bdrv_attach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ if (bs->dev) {
+ return -EBUSY;
+ }
+ bs->dev = dev;
+ bdrv_iostatus_reset(bs);
+ return 0;
+}
+
+/* TODO qdevified devices don't use this, remove when devices are qdevified */
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
+{
+ if (bdrv_attach_dev(bs, dev) < 0) {
+ abort();
+ }
+}
+
+void bdrv_detach_dev(BlockDriverState *bs, void *dev)
+/* TODO change to DeviceState *dev when all users are qdevified */
+{
+ assert(bs->dev == dev);
+ bs->dev = NULL;
+ bs->dev_ops = NULL;
+ bs->dev_opaque = NULL;
+ bs->buffer_alignment = 512;
+}
+
+/* TODO change to return DeviceState * when all users are qdevified */
+void *bdrv_get_attached_dev(BlockDriverState *bs)
+{
+ return bs->dev;
+}
+
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque)
+{
+ bs->dev_ops = ops;
+ bs->dev_opaque = opaque;
+}
+
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read)
+{
+ QObject *data;
+ const char *action_str;
+
+ switch (action) {
+ case BDRV_ACTION_REPORT:
+ action_str = "report";
+ break;
+ case BDRV_ACTION_IGNORE:
+ action_str = "ignore";
+ break;
+ case BDRV_ACTION_STOP:
+ action_str = "stop";
+ break;
+ default:
+ abort();
+ }
+
+ data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
+ bdrv->device_name,
+ action_str,
+ is_read ? "read" : "write");
+ monitor_protocol_event(ev, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected)
+{
+ QObject *data;
+
+ data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
+ bdrv_get_device_name(bs), ejected);
+ monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data);
+
+ qobject_decref(data);
+}
+
+static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
+{
+ if (bs->dev_ops && bs->dev_ops->change_media_cb) {
+ bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
+ bs->dev_ops->change_media_cb(bs->dev_opaque, load);
+ if (tray_was_closed) {
+ /* tray open */
+ bdrv_emit_qmp_eject_event(bs, true);
+ }
+ if (load) {
+ /* tray close */
+ bdrv_emit_qmp_eject_event(bs, false);
+ }
+ }
+}
+
+bool bdrv_dev_has_removable_media(BlockDriverState *bs)
+{
+ return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
+}
+
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
+{
+ if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
+ bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
+ }
+}
+
+bool bdrv_dev_is_tray_open(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_tray_open) {
+ return bs->dev_ops->is_tray_open(bs->dev_opaque);
+ }
+ return false;
+}
+
+static void bdrv_dev_resize_cb(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->resize_cb) {
+ bs->dev_ops->resize_cb(bs->dev_opaque);
+ }
+}
+
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
+{
+ if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
+ return bs->dev_ops->is_medium_locked(bs->dev_opaque);
+ }
+ return false;
+}
+
+/*
+ * Run consistency checks on an image
+ *
+ * Returns 0 if the check could be completed (it doesn't mean that the image is
+ * free of errors) or -errno when an internal error occurred. The results of the
+ * check are stored in res.
+ */
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
+{
+ if (bs->drv->bdrv_check == NULL) {
+ return -ENOTSUP;
+ }
+
+ memset(res, 0, sizeof(*res));
+ return bs->drv->bdrv_check(bs, res, fix);
+}
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector, total_sectors;
+ int n, ro, open_flags;
+ int ret = 0;
+ uint8_t *buf;
+ char filename[PATH_MAX];
+
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (!bs->backing_hd) {
+ return -ENOTSUP;
+ }
+
+ if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
+ return -EBUSY;
+ }
+
+ ro = bs->backing_hd->read_only;
+ /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
+ pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
+ open_flags = bs->backing_hd->open_flags;
+
+ if (ro) {
+ if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
+ }
+ }
+
+ total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
+ buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+
+ for (sector = 0; sector < total_sectors; sector += n) {
+ if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
+
+ if (bdrv_read(bs, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+
+ if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
+ ret = -EIO;
+ goto ro_cleanup;
+ }
+ }
+ }
+
+ if (drv->bdrv_make_empty) {
+ ret = drv->bdrv_make_empty(bs);
+ bdrv_flush(bs);
+ }
+
+ /*
+ * Make sure all data we wrote to the backing device is actually
+ * stable on disk.
+ */
+ if (bs->backing_hd)
+ bdrv_flush(bs->backing_hd);
+
+ro_cleanup:
+ g_free(buf);
+
+ if (ro) {
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
+ }
+
+ return ret;
+}
+
+int bdrv_commit_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (bs->drv && bs->backing_hd) {
+ int ret = bdrv_commit(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * Remove an active request from the tracked requests list
+ *
+ * This function should be called when a tracked request is completing.
+ */
+static void tracked_request_end(BdrvTrackedRequest *req)
+{
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+/**
+ * Add an active request to the tracked requests list
+ */
+static void tracked_request_begin(BdrvTrackedRequest *req,
+ BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, bool is_write)
+{
+ *req = (BdrvTrackedRequest){
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .is_write = is_write,
+ .co = qemu_coroutine_self(),
+ };
+
+ qemu_co_queue_init(&req->wait_queue);
+
+ QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+}
+
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_sector_num = sector_num;
+ *cluster_nb_sectors = nb_sectors;
+ } else {
+ int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+ *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
+ *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
+ nb_sectors, c);
+ }
+}
+
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
+ int64_t sector_num, int nb_sectors) {
+ /* aaaa bbbb */
+ if (sector_num >= req->sector_num + req->nb_sectors) {
+ return false;
+ }
+ /* bbbb aaaa */
+ if (req->sector_num >= sector_num + nb_sectors) {
+ return false;
+ }
+ return true;
+}
+
+static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BdrvTrackedRequest *req;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ bool retry;
+
+ /* If we touch the same cluster it counts as an overlap. This guarantees
+ * that allocating writes will be serialized and not race with each other
+ * for the same cluster. For example, in copy-on-read it ensures that the
+ * CoR read and write operations are atomic and guest writes cannot
+ * interleave between them.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ do {
+ retry = false;
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
+ if (tracked_request_overlaps(req, cluster_sector_num,
+ cluster_nb_sectors)) {
+ /* Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests. This must
+ * never happen since it means deadlock.
+ */
+ assert(qemu_coroutine_self() != req->co);
+
+ qemu_co_queue_wait(&req->wait_queue);
+ retry = true;
+ break;
+ }
+ }
+ } while (retry);
+}
+
+/*
+ * Return values:
+ * 0 - success
+ * -EINVAL - backing format specified, but no file
+ * -ENOSPC - can't update the backing file because no space is left in the
+ * image file header
+ * -ENOTSUP - format driver doesn't support changing the backing file
+ */
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+
+ /* Backing file format doesn't make sense without a backing file */
+ if (backing_fmt && !backing_file) {
+ return -EINVAL;
+ }
+
+ if (drv->bdrv_change_backing_file != NULL) {
+ ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
+ } else {
+ ret = -ENOTSUP;
+ }
+
+ if (ret == 0) {
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+ }
+ return ret;
+}
+
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs)
+{
+ BlockDriverState *overlay = NULL;
+ BlockDriverState *intermediate;
+
+ assert(active != NULL);
+ assert(bs != NULL);
+
+ /* if bs is the same as active, then by definition it has no overlay
+ */
+ if (active == bs) {
+ return NULL;
+ }
+
+ intermediate = active;
+ while (intermediate->backing_hd) {
+ if (intermediate->backing_hd == bs) {
+ overlay = intermediate;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+
+ return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+ BlockDriverState *bs;
+ QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ * if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base)
+{
+ BlockDriverState *intermediate;
+ BlockDriverState *base_bs = NULL;
+ BlockDriverState *new_top_bs = NULL;
+ BlkIntermediateStates *intermediate_state, *next;
+ int ret = -EIO;
+
+ QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+ QSIMPLEQ_INIT(&states_to_delete);
+
+ if (!top->drv || !base->drv) {
+ goto exit;
+ }
+
+ new_top_bs = bdrv_find_overlay(active, top);
+
+ if (new_top_bs == NULL) {
+ /* we could not find the image above 'top', this is an error */
+ goto exit;
+ }
+
+ /* special case of new_top_bs->backing_hd already pointing to base - nothing
+ * to do, no intermediate images */
+ if (new_top_bs->backing_hd == base) {
+ ret = 0;
+ goto exit;
+ }
+
+ intermediate = top;
+
+ /* now we will go down through the list, and add each BDS we find
+ * into our deletion queue, until we hit the 'base'
+ */
+ while (intermediate) {
+ intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+ intermediate_state->bs = intermediate;
+ QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+ if (intermediate->backing_hd == base) {
+ base_bs = intermediate->backing_hd;
+ break;
+ }
+ intermediate = intermediate->backing_hd;
+ }
+ if (base_bs == NULL) {
+ /* something went wrong, we did not end at the base. safely
+ * unravel everything, and exit with error */
+ goto exit;
+ }
+
+ /* success - we can delete the intermediate states, and link top->base */
+ ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+ base_bs->drv ? base_bs->drv->format_name : "");
+ if (ret) {
+ goto exit;
+ }
+ new_top_bs->backing_hd = base_bs;
+
+
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ /* so that bdrv_close() does not recursively close the chain */
+ intermediate_state->bs->backing_hd = NULL;
+ bdrv_delete(intermediate_state->bs);
+ }
+ ret = 0;
+
+exit:
+ QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+ g_free(intermediate_state);
+ }
+ return ret;
+}
+
+
+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
+ size_t size)
+{
+ int64_t len;
+
+ if (!bdrv_is_inserted(bs))
+ return -ENOMEDIUM;
+
+ if (bs->growable)
+ return 0;
+
+ len = bdrv_getlength(bs);
+
+ if (offset < 0)
+ return -EIO;
+
+ if ((offset > len) || (len - offset < size))
+ return -EIO;
+
+ return 0;
+}
+
+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+typedef struct RwCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ bool is_write;
+ int ret;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ if (!rwco->is_write) {
+ rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ } else {
+ rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
+ rwco->nb_sectors, rwco->qiov, 0);
+ }
+}
+
+/*
+ * Process a vectored synchronous request using coroutines
+ */
+static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, bool is_write)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+ .qiov = qiov,
+ .is_write = is_write,
+ .ret = NOT_DONE,
+ };
+ assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ /**
+ * In sync call context, when the vcpu is blocked, this throttling timer
+ * will not fire; so the I/O throttling function has to be disabled here
+ * if it has been enabled.
+ */
+ if (bs->io_limits_enabled) {
+ fprintf(stderr, "Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
+ bdrv_io_limits_disable(bs);
+ }
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_rw_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_rw_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+ return rwco.ret;
+}
+
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, bool is_write)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_rwv_co(bs, sector_num, &qiov, is_write);
+}
+
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
+}
+
+/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ bool enabled;
+ int ret;
+
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
+ ret = bdrv_read(bs, 0, buf, 1);
+ bs->io_limits_enabled = enabled;
+ return ret;
+}
+
+/* Return < 0 if error. Important errors are:
+ -EIO generic I/O error (may happen for all errors)
+ -ENOMEDIUM No media inserted.
+ -EINVAL Invalid sector number or nb_sectors
+ -EACCES Trying to write a read-only device
+*/
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
+}
+
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
+{
+ return bdrv_rwv_co(bs, sector_num, qiov, true);
+}
+
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count1)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = count1;
+ /* first read to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
+ count -= len;
+ if (count == 0)
+ return count1;
+ sector_num++;
+ buf += len;
+ }
+
+ /* read the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
+ return ret;
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ buf += len;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ memcpy(buf, tmp_buf, count);
+ }
+ return count1;
+}
+
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+{
+ uint8_t tmp_buf[BDRV_SECTOR_SIZE];
+ int len, nb_sectors, count;
+ int64_t sector_num;
+ int ret;
+
+ count = qiov->size;
+
+ /* first write to align to sector start */
+ len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
+ if (len > count)
+ len = count;
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ if (len > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
+ len);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ count -= len;
+ if (count == 0)
+ return qiov->size;
+ sector_num++;
+ }
+
+ /* write the sectors "in place" */
+ nb_sectors = count >> BDRV_SECTOR_BITS;
+ if (nb_sectors > 0) {
+ QEMUIOVector qiov_inplace;
+
+ qemu_iovec_init(&qiov_inplace, qiov->niov);
+ qemu_iovec_concat(&qiov_inplace, qiov, len,
+ nb_sectors << BDRV_SECTOR_BITS);
+ ret = bdrv_writev(bs, sector_num, &qiov_inplace);
+ qemu_iovec_destroy(&qiov_inplace);
+ if (ret < 0) {
+ return ret;
+ }
+
+ sector_num += nb_sectors;
+ len = nb_sectors << BDRV_SECTOR_BITS;
+ count -= len;
+ }
+
+ /* add data from the last sector */
+ if (count > 0) {
+ if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
+ if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
+ return ret;
+ }
+ return qiov->size;
+}
+
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count1)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = count1,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_pwritev(bs, offset, &qiov);
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count)
+{
+ int ret;
+
+ ret = bdrv_pwrite(bs, offset, buf, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* No flush needed for cache modes that already do it */
+ if (bs->enable_write_cache) {
+ bdrv_flush(bs);
+ }
+
+ return 0;
+}
+
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ /* Perform I/O through a temporary buffer so that users who scribble over
+ * their read buffer while the operation is in progress do not end up
+ * modifying the image file. This is critical for zero-copy guest I/O
+ * where anything might happen inside guest memory.
+ */
+ void *bounce_buffer;
+
+ BlockDriver *drv = bs->drv;
+ struct iovec iov;
+ QEMUIOVector bounce_qiov;
+ int64_t cluster_sector_num;
+ int cluster_nb_sectors;
+ size_t skip_bytes;
+ int ret;
+
+ /* Cover entire cluster so no additional backing file I/O is required when
+ * allocating cluster in the image file.
+ */
+ bdrv_round_to_clusters(bs, sector_num, nb_sectors,
+ &cluster_sector_num, &cluster_nb_sectors);
+
+ trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
+ cluster_sector_num, cluster_nb_sectors);
+
+ iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+ ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (drv->bdrv_co_write_zeroes &&
+ buffer_is_zero(bounce_buffer, iov.iov_len)) {
+ ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
+ cluster_nb_sectors);
+ } else {
+ /* This does not change the data on the disk, it is not necessary
+ * to flush even in cache=writethrough mode.
+ */
+ ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest requests. If this
+ * is a deliberate copy-on-read then we don't want to ignore the error.
+ * Simply report it in all cases.
+ */
+ goto err;
+ }
+
+ skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
+ nb_sectors * BDRV_SECTOR_SIZE);
+
+err:
+ qemu_vfree(bounce_buffer);
+ return ret;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk read I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, false, nb_sectors);
+ }
+
+ if (bs->copy_on_read) {
+ flags |= BDRV_REQ_COPY_ON_READ;
+ }
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight++;
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ int pnum;
+
+ ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!ret || pnum != nb_sectors) {
+ ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ goto out;
+ }
+ }
+
+ ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+
+out:
+ tracked_request_end(&req);
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ bs->copy_on_read_in_flight--;
+ }
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+ BDRV_REQ_COPY_ON_READ);
+}
+
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int ret;
+
+ /* TODO Emulate only part of misaligned requests instead of letting block
+ * drivers return -ENOTSUP and emulate everything */
+
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (ret != -ENOTSUP) {
+ return ret;
+ }
+ }
+
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ memset(iov.iov_base, 0, iov.iov_len);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+ int ret;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EACCES;
+ }
+ if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ }
+
+ /* throttling disk write I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, true, nb_sectors);
+ }
+
+ if (bs->copy_on_read_in_flight) {
+ wait_for_overlapping_requests(bs, sector_num, nb_sectors);
+ }
+
+ tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+
+ if (ret < 0) {
+ /* Do nothing, write notifier decided to fail this request */
+ } else if (flags & BDRV_REQ_ZERO_WRITE) {
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ }
+
+ if (ret == 0 && !bs->enable_write_cache) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_set_dirty(bs, sector_num, nb_sectors);
+ }
+
+ if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
+ bs->wr_highest_sector = sector_num + nb_sectors - 1;
+ }
+
+ tracked_request_end(&req);
+
+ return ret;
+}
+
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+ BDRV_REQ_ZERO_WRITE);
+}
+
+/**
+ * Truncate file to 'offset' bytes (needed only for file protocols)
+ */
+int bdrv_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_truncate)
+ return -ENOTSUP;
+ if (bs->read_only)
+ return -EACCES;
+ if (bdrv_in_use(bs))
+ return -EBUSY;
+ ret = drv->bdrv_truncate(bs, offset);
+ if (ret == 0) {
+ ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+ bdrv_dev_resize_cb(bs);
+ }
+ return ret;
+}
+
+/**
+ * Length of a allocated file in bytes. Sparse files are counted by actual
+ * allocated space. Return < 0 if error or unknown.
+ */
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_get_allocated_file_size) {
+ return drv->bdrv_get_allocated_file_size(bs);
+ }
+ if (bs->file) {
+ return bdrv_get_allocated_file_size(bs->file);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * Length of a file in bytes. Return < 0 if error or unknown.
+ */
+int64_t bdrv_getlength(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (bs->growable || bdrv_dev_has_removable_media(bs)) {
+ if (drv->bdrv_getlength) {
+ return drv->bdrv_getlength(bs);
+ }
+ }
+ return bs->total_sectors * BDRV_SECTOR_SIZE;
+}
+
+/* return 0 as number of sectors if no device present or error */
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
+{
+ int64_t length;
+ length = bdrv_getlength(bs);
+ if (length < 0)
+ length = 0;
+ else
+ length = length >> BDRV_SECTOR_BITS;
+ *nb_sectors_ptr = length;
+}
+
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits)
+{
+ bs->io_limits = *io_limits;
+ bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
+}
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ bs->on_read_error = on_read_error;
+ bs->on_write_error = on_write_error;
+}
+
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
+{
+ return is_read ? bs->on_read_error : bs->on_write_error;
+}
+
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+ BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BDRV_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BDRV_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BDRV_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error)
+{
+ assert(error >= 0);
+ bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
+ if (action == BDRV_ACTION_STOP) {
+ vm_stop(RUN_STATE_IO_ERROR);
+ bdrv_iostatus_set_err(bs, error);
+ }
+}
+
+int bdrv_is_read_only(BlockDriverState *bs)
+{
+ return bs->read_only;
+}
+
+int bdrv_is_sg(BlockDriverState *bs)
+{
+ return bs->sg;
+}
+
+int bdrv_enable_write_cache(BlockDriverState *bs)
+{
+ return bs->enable_write_cache;
+}
+
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
+{
+ bs->enable_write_cache = wce;
+
+ /* so a reopen() will preserve wce */
+ if (wce) {
+ bs->open_flags |= BDRV_O_CACHE_WB;
+ } else {
+ bs->open_flags &= ~BDRV_O_CACHE_WB;
+ }
+}
+
+int bdrv_is_encrypted(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return 1;
+ return bs->encrypted;
+}
+
+int bdrv_key_required(BlockDriverState *bs)
+{
+ BlockDriverState *backing_hd = bs->backing_hd;
+
+ if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
+ return 1;
+ return (bs->encrypted && !bs->valid_key);
+}
+
+int bdrv_set_key(BlockDriverState *bs, const char *key)
+{
+ int ret;
+ if (bs->backing_hd && bs->backing_hd->encrypted) {
+ ret = bdrv_set_key(bs->backing_hd, key);
+ if (ret < 0)
+ return ret;
+ if (!bs->encrypted)
+ return 0;
+ }
+ if (!bs->encrypted) {
+ return -EINVAL;
+ } else if (!bs->drv || !bs->drv->bdrv_set_key) {
+ return -ENOMEDIUM;
+ }
+ ret = bs->drv->bdrv_set_key(bs, key);
+ if (ret < 0) {
+ bs->valid_key = 0;
+ } else if (!bs->valid_key) {
+ bs->valid_key = 1;
+ /* call the change callback now, we skipped it on open */
+ bdrv_dev_change_media_cb(bs, true);
+ }
+ return ret;
+}
+
+const char *bdrv_get_format_name(BlockDriverState *bs)
+{
+ return bs->drv ? bs->drv->format_name : NULL;
+}
+
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque)
+{
+ BlockDriver *drv;
+
+ QLIST_FOREACH(drv, &bdrv_drivers, list) {
+ it(opaque, drv->format_name);
+ }
+}
+
+BlockDriverState *bdrv_find(const char *name)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ if (!strcmp(name, bs->device_name)) {
+ return bs;
+ }
+ }
+ return NULL;
+}
+
+BlockDriverState *bdrv_next(BlockDriverState *bs)
+{
+ if (!bs) {
+ return QTAILQ_FIRST(&bdrv_states);
+ }
+ return QTAILQ_NEXT(bs, list);
+}
+
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ it(opaque, bs);
+ }
+}
+
+const char *bdrv_get_device_name(BlockDriverState *bs)
+{
+ return bs->device_name;
+}
+
+int bdrv_get_flags(BlockDriverState *bs)
+{
+ return bs->open_flags;
+}
+
+int bdrv_flush_all(void)
+{
+ BlockDriverState *bs;
+ int result = 0;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int bdrv_has_zero_init_1(BlockDriverState *bs)
+{
+ return 1;
+}
+
+int bdrv_has_zero_init(BlockDriverState *bs)
+{
+ assert(bs->drv);
+
+ if (bs->drv->bdrv_has_zero_init) {
+ return bs->drv->bdrv_has_zero_init(bs);
+ }
+
+ /* safe default */
+ return 0;
+}
+
+typedef struct BdrvCoIsAllocatedData {
+ BlockDriverState *bs;
+ BlockDriverState *base;
+ int64_t sector_num;
+ int nb_sectors;
+ int *pnum;
+ int ret;
+ bool done;
+} BdrvCoIsAllocatedData;
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ int64_t n;
+
+ if (sector_num >= bs->total_sectors) {
+ *pnum = 0;
+ return 0;
+ }
+
+ n = bs->total_sectors - sector_num;
+ if (n < nb_sectors) {
+ nb_sectors = n;
+ }
+
+ if (!bs->drv->bdrv_co_is_allocated) {
+ *pnum = nb_sectors;
+ return 1;
+ }
+
+ return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum);
+}
+
+/* Coroutine wrapper for bdrv_is_allocated() */
+static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *bs = data->bs;
+
+ data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors,
+ data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated().
+ *
+ * See bdrv_co_is_allocated() for details.
+ */
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+/*
+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in any image between
+ * BASE and TOP (inclusive). BASE can be NULL to check if the given
+ * sector is allocated in any image of the chain. Return false otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ */
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BlockDriverState *intermediate;
+ int ret, n = nb_sectors;
+
+ intermediate = top;
+ while (intermediate && intermediate != base) {
+ int pnum_inter;
+ ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors,
+ &pnum_inter);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ *pnum = pnum_inter;
+ return 1;
+ }
+
+ /*
+ * [sector_num, nb_sectors] is unallocated on top but intermediate
+ * might have
+ *
+ * [sector_num+x, nr_sectors] allocated.
+ */
+ if (n > pnum_inter &&
+ (intermediate == top ||
+ sector_num + pnum_inter < intermediate->total_sectors)) {
+ n = pnum_inter;
+ }
+
+ intermediate = intermediate->backing_hd;
+ }
+
+ *pnum = n;
+ return 0;
+}
+
+/* Coroutine wrapper for bdrv_is_allocated_above() */
+static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque)
+{
+ BdrvCoIsAllocatedData *data = opaque;
+ BlockDriverState *top = data->bs;
+ BlockDriverState *base = data->base;
+
+ data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num,
+ data->nb_sectors, data->pnum);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_is_allocated_above().
+ *
+ * See bdrv_co_is_allocated_above() for details.
+ */
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ Coroutine *co;
+ BdrvCoIsAllocatedData data = {
+ .bs = top,
+ .base = base,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ return data.ret;
+}
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
+{
+ if (bs->backing_hd && bs->backing_hd->encrypted)
+ return bs->backing_file;
+ else if (bs->encrypted)
+ return bs->filename;
+ else
+ return NULL;
+}
+
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size)
+{
+ pstrcpy(filename, filename_size, bs->backing_file);
+}
+
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_write_compressed)
+ return -ENOTSUP;
+ if (bdrv_check_request(bs, sector_num, nb_sectors))
+ return -EIO;
+
+ assert(!bs->dirty_bitmap);
+
+ return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+}
+
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (!drv->bdrv_get_info)
+ return -ENOTSUP;
+ memset(bdi, 0, sizeof(*bdi));
+ return drv->bdrv_get_info(bs, bdi);
+}
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_writev_vmstate(bs, &qiov, pos);
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ } else if (drv->bdrv_save_vmstate) {
+ return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_writev_vmstate(bs->file, qiov, pos);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv)
+ return -ENOMEDIUM;
+ if (drv->bdrv_load_vmstate)
+ return drv->bdrv_load_vmstate(bs, buf, pos, size);
+ if (bs->file)
+ return bdrv_load_vmstate(bs->file, buf, pos, size);
+ return -ENOTSUP;
+}
+
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+{
+ if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
+ return;
+ }
+
+ bs->drv->bdrv_debug_event(bs, event);
+}
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
+ return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_resume) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
+ return bs->drv->bdrv_debug_resume(bs, tag);
+ }
+
+ return -ENOTSUP;
+}
+
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
+{
+ while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
+ bs = bs->file;
+ }
+
+ if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
+ return bs->drv->bdrv_debug_is_suspended(bs, tag);
+ }
+
+ return false;
+}
+
+int bdrv_is_snapshot(BlockDriverState *bs)
+{
+ return !!(bs->open_flags & BDRV_O_SNAPSHOT);
+}
+
+/* backing_file can either be relative, or absolute, or a protocol. If it is
+ * relative, it must be relative to the chain. So, passing in bs->filename
+ * from a BDS as backing_file should not be done, as that may be relative to
+ * the CWD rather than the chain. */
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file)
+{
+ char *filename_full = NULL;
+ char *backing_file_full = NULL;
+ char *filename_tmp = NULL;
+ int is_protocol = 0;
+ BlockDriverState *curr_bs = NULL;
+ BlockDriverState *retval = NULL;
+
+ if (!bs || !bs->drv || !backing_file) {
+ return NULL;
+ }
+
+ filename_full = g_malloc(PATH_MAX);
+ backing_file_full = g_malloc(PATH_MAX);
+ filename_tmp = g_malloc(PATH_MAX);
+
+ is_protocol = path_has_protocol(backing_file);
+
+ for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+
+ /* If either of the filename paths is actually a protocol, then
+ * compare unmodified paths; otherwise make paths relative */
+ if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
+ if (strcmp(backing_file, curr_bs->backing_file) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ } else {
+ /* If not an absolute filename path, make it relative to the current
+ * image's filename path */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ backing_file);
+
+ /* We are going to compare absolute pathnames */
+ if (!realpath(filename_tmp, filename_full)) {
+ continue;
+ }
+
+ /* We need to make sure the backing filename we are comparing against
+ * is relative to the current image filename (or absolute) */
+ path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+ curr_bs->backing_file);
+
+ if (!realpath(filename_tmp, backing_file_full)) {
+ continue;
+ }
+
+ if (strcmp(backing_file_full, filename_full) == 0) {
+ retval = curr_bs->backing_hd;
+ break;
+ }
+ }
+ }
+
+ g_free(filename_full);
+ g_free(backing_file_full);
+ g_free(filename_tmp);
+ return retval;
+}
+
+int bdrv_get_backing_file_depth(BlockDriverState *bs)
+{
+ if (!bs->drv) {
+ return 0;
+ }
+
+ if (!bs->backing_hd) {
+ return 0;
+ }
+
+ return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
+}
+
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+ BlockDriverState *curr_bs = NULL;
+
+ if (!bs) {
+ return NULL;
+ }
+
+ curr_bs = bs;
+
+ while (curr_bs->backing_hd) {
+ curr_bs = curr_bs->backing_hd;
+ }
+ return curr_bs;
+}
+
+/**************************************************************/
+/* async I/Os */
+
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, false);
+}
+
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
+
+ return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, true);
+}
+
+
+typedef struct MultiwriteCB {
+ int error;
+ int num_requests;
+ int num_callbacks;
+ struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ QEMUIOVector *free_qiov;
+ } callbacks[];
+} MultiwriteCB;
+
+static void multiwrite_user_cb(MultiwriteCB *mcb)
+{
+ int i;
+
+ for (i = 0; i < mcb->num_callbacks; i++) {
+ mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
+ if (mcb->callbacks[i].free_qiov) {
+ qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+ }
+ g_free(mcb->callbacks[i].free_qiov);
+ }
+}
+
+static void multiwrite_cb(void *opaque, int ret)
+{
+ MultiwriteCB *mcb = opaque;
+
+ trace_multiwrite_cb(mcb, ret);
+
+ if (ret < 0 && !mcb->error) {
+ mcb->error = ret;
+ }
+
+ mcb->num_requests--;
+ if (mcb->num_requests == 0) {
+ multiwrite_user_cb(mcb);
+ g_free(mcb);
+ }
+}
+
+static int multiwrite_req_compare(const void *a, const void *b)
+{
+ const BlockRequest *req1 = a, *req2 = b;
+
+ /*
+ * Note that we can't simply subtract req2->sector from req1->sector
+ * here as that could overflow the return value.
+ */
+ if (req1->sector > req2->sector) {
+ return 1;
+ } else if (req1->sector < req2->sector) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Takes a bunch of requests and tries to merge them. Returns the number of
+ * requests that remain after merging.
+ */
+static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs, MultiwriteCB *mcb)
+{
+ int i, outidx;
+
+ // Sort requests by start sector
+ qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
+
+ // Check if adjacent requests touch the same clusters. If so, combine them,
+ // filling up gaps with zero sectors.
+ outidx = 0;
+ for (i = 1; i < num_reqs; i++) {
+ int merge = 0;
+ int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
+
+ // Handle exactly sequential writes and overlapping writes.
+ if (reqs[i].sector <= oldreq_last) {
+ merge = 1;
+ }
+
+ if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+ merge = 0;
+ }
+
+ if (merge) {
+ size_t size;
+ QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
+ qemu_iovec_init(qiov,
+ reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
+
+ // Add the first request to the merged one. If the requests are
+ // overlapping, drop the last sectors of the first request.
+ size = (reqs[i].sector - reqs[outidx].sector) << 9;
+ qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
+
+ // We should need to add any zeros between the two requests
+ assert (reqs[i].sector <= oldreq_last);
+
+ // Add the second request
+ qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
+
+ reqs[outidx].nb_sectors = qiov->size >> 9;
+ reqs[outidx].qiov = qiov;
+
+ mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
+ } else {
+ outidx++;
+ reqs[outidx].sector = reqs[i].sector;
+ reqs[outidx].nb_sectors = reqs[i].nb_sectors;
+ reqs[outidx].qiov = reqs[i].qiov;
+ }
+ }
+
+ return outidx + 1;
+}
+
+/*
+ * Submit multiple AIO write requests at once.
+ *
+ * On success, the function returns 0 and all requests in the reqs array have
+ * been submitted. In error case this function returns -1, and any of the
+ * requests may or may not be submitted yet. In particular, this means that the
+ * callback will be called for some of the requests, for others it won't. The
+ * caller must check the error field of the BlockRequest to wait for the right
+ * callbacks (if error != 0, no callback will be called).
+ *
+ * The implementation may modify the contents of the reqs array, e.g. to merge
+ * requests. However, the fields opaque and error are left unmodified as they
+ * are used to signal failure for a single request to the caller.
+ */
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+{
+ MultiwriteCB *mcb;
+ int i;
+
+ /* don't submit writes if we don't have a medium */
+ if (bs->drv == NULL) {
+ for (i = 0; i < num_reqs; i++) {
+ reqs[i].error = -ENOMEDIUM;
+ }
+ return -1;
+ }
+
+ if (num_reqs == 0) {
+ return 0;
+ }
+
+ // Create MultiwriteCB structure
+ mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
+ mcb->num_requests = 0;
+ mcb->num_callbacks = num_reqs;
+
+ for (i = 0; i < num_reqs; i++) {
+ mcb->callbacks[i].cb = reqs[i].cb;
+ mcb->callbacks[i].opaque = reqs[i].opaque;
+ }
+
+ // Check for mergable requests
+ num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+
+ trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+
+ /* Run the aio requests. */
+ mcb->num_requests = num_reqs;
+ for (i = 0; i < num_reqs; i++) {
+ bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
+ reqs[i].nb_sectors, multiwrite_cb, mcb);
+ }
+
+ return 0;
+}
+
+void bdrv_aio_cancel(BlockDriverAIOCB *acb)
+{
+ acb->aiocb_info->cancel(acb);
+}
+
+/* block I/O throttling */
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait)
+{
+ uint64_t bps_limit = 0;
+ uint64_t extension;
+ double bytes_limit, bytes_base, bytes_res;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.bps[is_write]) {
+ bps_limit = bs->io_limits.bps[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ bytes_limit = bps_limit * slice_time;
+ bytes_base = bs->slice_submitted.bytes[is_write];
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bytes_base += bs->slice_submitted.bytes[!is_write];
+ }
+
+ /* bytes_base: the bytes of data which have been read/written; and
+ * it is obtained from the history statistic info.
+ * bytes_res: the remaining bytes of data which need to be read/written.
+ * (bytes_base + bytes_res) / bps_limit: used to calcuate
+ * the total time for completing reading/writting all data.
+ */
+ bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+
+ if (bytes_base + bytes_res <= bytes_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time;
+
+ /* When the I/O rate at runtime exceeds the limits,
+ * bs->slice_end need to be extended in order that the current statistic
+ * info can be kept until the timer fire, so it is increased and tuned
+ * based on the result of experiment.
+ */
+ extension = wait_time * NANOSECONDS_PER_SECOND;
+ extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) *
+ BLOCK_IO_SLICE_TIME;
+ bs->slice_end += extension;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait)
+{
+ uint64_t iops_limit = 0;
+ double ios_limit, ios_base;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.iops[is_write]) {
+ iops_limit = bs->io_limits.iops[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ ios_limit = iops_limit * slice_time;
+ ios_base = bs->slice_submitted.ios[is_write];
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ ios_base += bs->slice_submitted.ios[!is_write];
+ }
+
+ if (ios_base + 1 <= ios_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch, in seconds */
+ wait_time = (ios_base + 1) / iops_limit;
+ if (wait_time > elapsed_time) {
+ wait_time = wait_time - elapsed_time;
+ } else {
+ wait_time = 0;
+ }
+
+ /* Exceeded current slice, extend it by another slice time */
+ bs->slice_end += BLOCK_IO_SLICE_TIME;
+ if (wait) {
+ *wait = wait_time * NANOSECONDS_PER_SECOND;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait)
+{
+ int64_t now, max_wait;
+ uint64_t bps_wait = 0, iops_wait = 0;
+ double elapsed_time;
+ int bps_ret, iops_ret;
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (now > bs->slice_end) {
+ bs->slice_start = now;
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+ memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted));
+ }
+
+ elapsed_time = now - bs->slice_start;
+ elapsed_time /= (NANOSECONDS_PER_SECOND);
+
+ bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
+ is_write, elapsed_time, &bps_wait);
+ iops_ret = bdrv_exceed_iops_limits(bs, is_write,
+ elapsed_time, &iops_wait);
+ if (bps_ret || iops_ret) {
+ max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
+ if (wait) {
+ *wait = max_wait;
+ }
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (bs->slice_end < now + max_wait) {
+ bs->slice_end = now + max_wait;
+ }
+
+ return true;
+ }
+
+ if (wait) {
+ *wait = 0;
+ }
+
+ bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors *
+ BDRV_SECTOR_SIZE;
+ bs->slice_submitted.ios[is_write]++;
+
+ return false;
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+typedef struct BlockDriverAIOCBSync {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int ret;
+ /* vector translation state */
+ QEMUIOVector *qiov;
+ uint8_t *bounce;
+ int is_write;
+} BlockDriverAIOCBSync;
+
+static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBSync *acb =
+ container_of(blockacb, BlockDriverAIOCBSync, common);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static const AIOCBInfo bdrv_em_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBSync),
+ .cancel = bdrv_aio_cancel_em,
+};
+
+static void bdrv_aio_bh_cb(void *opaque)
+{
+ BlockDriverAIOCBSync *acb = opaque;
+
+ if (!acb->is_write)
+ qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
+ qemu_vfree(acb->bounce);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_bh_delete(acb->bh);
+ acb->bh = NULL;
+ qemu_aio_release(acb);
+}
+
+static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ int is_write)
+
+{
+ BlockDriverAIOCBSync *acb;
+
+ acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
+ acb->is_write = is_write;
+ acb->qiov = qiov;
+ acb->bounce = qemu_blockalign(bs, qiov->size);
+ acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
+
+ if (is_write) {
+ qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
+ acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
+ } else {
+ acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
+ }
+
+ qemu_bh_schedule(acb->bh);
+
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+
+typedef struct BlockDriverAIOCBCoroutine {
+ BlockDriverAIOCB common;
+ BlockRequest req;
+ bool is_write;
+ bool *done;
+ QEMUBH* bh;
+} BlockDriverAIOCBCoroutine;
+
+static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
+{
+ BlockDriverAIOCBCoroutine *acb =
+ container_of(blockacb, BlockDriverAIOCBCoroutine, common);
+ bool done = false;
+
+ acb->done = &done;
+ while (!done) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo bdrv_em_co_aiocb_info = {
+ .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
+ .cancel = bdrv_aio_co_cancel_em,
+};
+
+static void bdrv_co_em_bh(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+
+ acb->common.cb(acb->common.opaque, acb->req.error);
+
+ if (acb->done) {
+ *acb->done = true;
+ }
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+}
+
+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
+static void coroutine_fn bdrv_co_do_rw(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ if (!acb->is_write) {
+ acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ } else {
+ acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
+ acb->req.nb_sectors, acb->req.qiov, 0);
+ }
+
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->req.qiov = qiov;
+ acb->is_write = is_write;
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_co_do_rw);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_flush(bs);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_flush(bs, opaque);
+
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->done = NULL;
+
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+{
+ BlockDriverAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+ acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+}
+
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ Coroutine *co;
+ BlockDriverAIOCBCoroutine *acb;
+
+ trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->req.sector = sector_num;
+ acb->req.nb_sectors = nb_sectors;
+ acb->done = NULL;
+ co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ return &acb->common;
+}
+
+void bdrv_init(void)
+{
+ module_call_init(MODULE_INIT_BLOCK);
+}
+
+void bdrv_init_with_whitelist(void)
+{
+ use_bdrv_whitelist = 1;
+ bdrv_init();
+}
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriverAIOCB *acb;
+
+ acb = g_slice_alloc(aiocb_info->aiocb_size);
+ acb->aiocb_info = aiocb_info;
+ acb->bs = bs;
+ acb->cb = cb;
+ acb->opaque = opaque;
+ return acb;
+}
+
+void qemu_aio_release(void *p)
+{
+ BlockDriverAIOCB *acb = p;
+ g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+}
+
+/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine, NULL);
+}
+
+static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *iov,
+ bool is_write)
+{
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockDriverAIOCB *acb;
+
+ if (is_write) {
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ } else {
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ }
+
+ trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
+ if (!acb) {
+ return -EIO;
+ }
+ qemu_coroutine_yield();
+
+ return co.ret;
+}
+
+static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
+}
+
+static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
+}
+
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ int ret;
+
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ /* Write back cached data to the OS even with cache=unsafe */
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+ if (bs->drv->bdrv_co_flush_to_os) {
+ ret = bs->drv->bdrv_co_flush_to_os(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* But don't actually force it to the disk with cache=unsafe */
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
+ goto flush_parent;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+ if (bs->drv->bdrv_co_flush_to_disk) {
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
+ } else if (bs->drv->bdrv_aio_flush) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ } else {
+ /*
+ * Some block drivers always operate in either writethrough or unsafe
+ * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+ * know how the server works (because the behaviour is hardcoded or
+ * depends on server-side configuration), so we can't ensure that
+ * everything is safe on disk. Returning an error doesn't work because
+ * that would break guests even if the server operates in writethrough
+ * mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ ret = 0;
+ }
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+flush_parent:
+ return bdrv_co_flush(bs->file);
+}
+
+void bdrv_invalidate_cache(BlockDriverState *bs)
+{
+ if (bs->drv && bs->drv->bdrv_invalidate_cache) {
+ bs->drv->bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_invalidate_cache_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bdrv_invalidate_cache(bs);
+ }
+}
+
+void bdrv_clear_incoming_migration_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ }
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_flush_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_flush_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+}
+
+int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
+ return -EIO;
+ } else if (bs->read_only) {
+ return -EROFS;
+ }
+
+ if (bs->dirty_bitmap) {
+ bdrv_reset_dirty(bs, sector_num, nb_sectors);
+ }
+
+ /* Do nothing if disabled. */
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ return 0;
+ }
+
+ if (bs->drv->bdrv_co_discard) {
+ return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
+ } else if (bs->drv->bdrv_aio_discard) {
+ BlockDriverAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ } else {
+ return 0;
+ }
+}
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_discard_co_entry(&rwco);
+ } else {
+ co = qemu_coroutine_create(bdrv_discard_co_entry);
+ qemu_coroutine_enter(co, &rwco);
+ while (rwco.ret == NOT_DONE) {
+ qemu_aio_wait();
+ }
+ }
+
+ return rwco.ret;
+}
+
+/**************************************************************/
+/* removable device support */
+
+/**
+ * Return TRUE if the media is present
+ */
+int bdrv_is_inserted(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv)
+ return 0;
+ if (!drv->bdrv_is_inserted)
+ return 1;
+ return drv->bdrv_is_inserted(bs);
+}
+
+/**
+ * Return whether the media changed since the last call to this
+ * function, or -ENOTSUP if we don't know. Most drivers don't know.
+ */
+int bdrv_media_changed(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_media_changed) {
+ return drv->bdrv_media_changed(bs);
+ }
+ return -ENOTSUP;
+}
+
+/**
+ * If eject_flag is TRUE, eject the media. Otherwise, close the tray
+ */
+void bdrv_eject(BlockDriverState *bs, bool eject_flag)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_eject) {
+ drv->bdrv_eject(bs, eject_flag);
+ }
+
+ if (bs->device_name[0] != '\0') {
+ bdrv_emit_qmp_eject_event(bs, eject_flag);
+ }
+}
+
+/**
+ * Lock or unlock the media (if it is locked, the user won't be able
+ * to eject it manually).
+ */
+void bdrv_lock_medium(BlockDriverState *bs, bool locked)
+{
+ BlockDriver *drv = bs->drv;
+
+ trace_bdrv_lock_medium(bs, locked);
+
+ if (drv && drv->bdrv_lock_medium) {
+ drv->bdrv_lock_medium(bs, locked);
+ }
+}
+
+/* needed for generic scsi interface */
+
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_ioctl)
+ return drv->bdrv_ioctl(bs, req, buf);
+ return -ENOTSUP;
+}
+
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_aio_ioctl)
+ return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
+ return NULL;
+}
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
+{
+ bs->buffer_alignment = align;
+}
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+ return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
+}
+
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity)
+{
+ int64_t bitmap_size;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (granularity) {
+ granularity >>= BDRV_SECTOR_BITS;
+ assert(!bs->dirty_bitmap);
+ bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS);
+ bs->dirty_bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
+ } else {
+ if (bs->dirty_bitmap) {
+ hbitmap_free(bs->dirty_bitmap);
+ bs->dirty_bitmap = NULL;
+ }
+ }
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_get(bs->dirty_bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_dirty_iter_init(BlockDriverState *bs, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bs->dirty_bitmap, 0);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_set(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ hbitmap_reset(bs->dirty_bitmap, cur_sector, nr_sectors);
+}
+
+int64_t bdrv_get_dirty_count(BlockDriverState *bs)
+{
+ if (bs->dirty_bitmap) {
+ return hbitmap_count(bs->dirty_bitmap);
+ } else {
+ return 0;
+ }
+}
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use)
+{
+ assert(bs->in_use != in_use);
+ bs->in_use = in_use;
+}
+
+int bdrv_in_use(BlockDriverState *bs)
+{
+ return bs->in_use;
+}
+
+void bdrv_iostatus_enable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = true;
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
+{
+ return (bs->iostatus_enabled &&
+ (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+void bdrv_iostatus_disable(BlockDriverState *bs)
+{
+ bs->iostatus_enabled = false;
+}
+
+void bdrv_iostatus_reset(BlockDriverState *bs)
+{
+ if (bdrv_iostatus_is_enabled(bs)) {
+ bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (bs->job) {
+ block_job_iostatus_reset(bs->job);
+ }
+ }
+}
+
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
+{
+ assert(bdrv_iostatus_is_enabled(bs));
+ if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+void
+bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
+ enum BlockAcctType type)
+{
+ assert(type < BDRV_MAX_IOTYPE);
+
+ cookie->bytes = bytes;
+ cookie->start_time_ns = get_clock();
+ cookie->type = type;
+}
+
+void
+bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
+{
+ assert(cookie->type < BDRV_MAX_IOTYPE);
+
+ bs->nr_bytes[cookie->type] += cookie->bytes;
+ bs->nr_ops[cookie->type]++;
+ bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+}
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet)
+{
+ QEMUOptionParameter *param = NULL, *create_options = NULL;
+ QEMUOptionParameter *backing_fmt, *backing_file, *size;
+ BlockDriverState *bs = NULL;
+ BlockDriver *drv, *proto_drv;
+ BlockDriver *backing_drv = NULL;
+ int ret = 0;
+
+ /* Find driver and parse its options */
+ drv = bdrv_find_format(fmt);
+ if (!drv) {
+ error_setg(errp, "Unknown file format '%s'", fmt);
+ return;
+ }
+
+ proto_drv = bdrv_find_protocol(filename, true);
+ if (!proto_drv) {
+ error_setg(errp, "Unknown protocol '%s'", filename);
+ return;
+ }
+
+ create_options = append_option_parameters(create_options,
+ drv->create_options);
+ create_options = append_option_parameters(create_options,
+ proto_drv->create_options);
+
+ /* Create parameter list with default values */
+ param = parse_option_parameters("", create_options, param);
+
+ set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
+
+ /* Parse -o options */
+ if (options) {
+ param = parse_option_parameters(options, create_options, param);
+ if (param == NULL) {
+ error_setg(errp, "Invalid options for file format '%s'.", fmt);
+ goto out;
+ }
+ }
+
+ if (base_filename) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
+ base_filename)) {
+ error_setg(errp, "Backing file not supported for file format '%s'",
+ fmt);
+ goto out;
+ }
+ }
+
+ if (base_fmt) {
+ if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
+ error_setg(errp, "Backing file format not supported for file "
+ "format '%s'", fmt);
+ goto out;
+ }
+ }
+
+ backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
+ if (backing_file && backing_file->value.s) {
+ if (!strcmp(filename, backing_file->value.s)) {
+ error_setg(errp, "Error: Trying to create an image with the "
+ "same filename as the backing file");
+ goto out;
+ }
+ }
+
+ backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
+ if (backing_fmt && backing_fmt->value.s) {
+ backing_drv = bdrv_find_format(backing_fmt->value.s);
+ if (!backing_drv) {
+ error_setg(errp, "Unknown backing file format '%s'",
+ backing_fmt->value.s);
+ goto out;
+ }
+ }
+
+ // The size for the image must always be specified, with one exception:
+ // If we are using a backing file, we can obtain the size from there
+ size = get_option_parameter(param, BLOCK_OPT_SIZE);
+ if (size && size->value.n == -1) {
+ if (backing_file && backing_file->value.s) {
+ uint64_t size;
+ char buf[32];
+ int back_flags;
+
+ /* backing files always opened read-only */
+ back_flags =
+ flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+
+ bs = bdrv_new("");
+
+ ret = bdrv_open(bs, backing_file->value.s, NULL, back_flags,
+ backing_drv);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not open '%s'",
+ backing_file->value.s);
+ goto out;
+ }
+ bdrv_get_geometry(bs, &size);
+ size *= 512;
+
+ snprintf(buf, sizeof(buf), "%" PRId64, size);
+ set_option_parameter(param, BLOCK_OPT_SIZE, buf);
+ } else {
+ error_setg(errp, "Image creation needs a size parameter");
+ goto out;
+ }
+ }
+
+ if (!quiet) {
+ printf("Formatting '%s', fmt=%s ", filename, fmt);
+ print_option_parameters(param);
+ puts("");
+ }
+ ret = bdrv_create(drv, filename, param);
+ if (ret < 0) {
+ if (ret == -ENOTSUP) {
+ error_setg(errp,"Formatting or formatting option not supported for "
+ "file format '%s'", fmt);
+ } else if (ret == -EFBIG) {
+ const char *cluster_size_hint = "";
+ if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) {
+ cluster_size_hint = " (try using a larger cluster size)";
+ }
+ error_setg(errp, "The image size is too large for file format '%s'%s",
+ fmt, cluster_size_hint);
+ } else {
+ error_setg(errp, "%s: error while creating %s: %s", filename, fmt,
+ strerror(-ret));
+ }
+ }
+
+out:
+ free_option_parameters(create_options);
+ free_option_parameters(param);
+
+ if (bs) {
+ bdrv_delete(bs);
+ }
+}
+
+AioContext *bdrv_get_aio_context(BlockDriverState *bs)
+{
+ /* Currently BlockDriverState always uses the main loop AioContext */
+ return qemu_get_aio_context();
+}
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier)
+{
+ notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}
diff --git a/contrib/qemu/block/qcow.c b/contrib/qemu/block/qcow.c
new file mode 100644
index 000000000..5239bd68f
--- /dev/null
+++ b/contrib/qemu/block/qcow.c
@@ -0,0 +1,914 @@
+/*
+ * Block driver for the QCOW format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "migration/migration.h"
+
+/**************************************************************/
+/* QEMU COW block driver with compression and encryption support */
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+#define QCOW_VERSION 1
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_OFLAG_COMPRESSED (1LL << 63)
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t mtime;
+ uint64_t size; /* in bytes */
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypt_method;
+ uint64_t l1_table_offset;
+} QCowHeader;
+
+#define L2_CACHE_SIZE 16
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+ uint64_t *l2_cache;
+ uint64_t l2_cache_offsets[L2_CACHE_SIZE];
+ uint32_t l2_cache_counts[L2_CACHE_SIZE];
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ CoMutex lock;
+ Error *migration_blocker;
+} BDRVQcowState;
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+
+static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) == QCOW_VERSION)
+ return 100;
+ else
+ return 0;
+}
+
+static int qcow_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, shift, ret;
+ QCowHeader header;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be32_to_cpus(&header.mtime);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version != QCOW_VERSION) {
+ char version[64];
+ snprintf(version, sizeof(version), "QCOW version %d", header.version);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow", version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.size <= 1 || header.cluster_bits < 9) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = header.l2_bits;
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
+
+ /* read the level 1 table */
+ shift = s->cluster_bits + s->l2_bits;
+ s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+
+ s->l1_table_offset = header.l1_table_offset;
+ s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ /* alloc L2 cache */
+ s->l2_cache = g_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ s->cluster_cache = g_malloc(s->cluster_size);
+ s->cluster_data = g_malloc(s->cluster_size);
+ s->cluster_cache_offset = -1;
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ /* Disable migration when qcow images are used */
+ error_set(&s->migration_blocker,
+ QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+ "qcow", bs->device_name, "live migration");
+ migrate_add_blocker(s->migration_blocker);
+
+ qemu_co_mutex_init(&s->lock);
+ return 0;
+
+ fail:
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+ return ret;
+}
+
+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int qcow_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+ return 0;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+/* 'allocate' is:
+ *
+ * 0 to not allocate.
+ *
+ * 1 to allocate a normal cluster (for sector indexes 'n_start' to
+ * 'n_end')
+ *
+ * 2 to allocate a compressed cluster of size
+ * 'compressed_size'. 'compressed_size' must be > 0 and <
+ * cluster_size
+ *
+ * return 0 if not allocated.
+ */
+static uint64_t get_cluster_offset(BlockDriverState *bs,
+ uint64_t offset, int allocate,
+ int compressed_size,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ int min_index, i, j, l1_index, l2_index;
+ uint64_t l2_offset, *l2_table, cluster_offset, tmp;
+ uint32_t min_count;
+ int new_l2_table;
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ l2_offset = s->l1_table[l1_index];
+ new_l2_table = 0;
+ if (!l2_offset) {
+ if (!allocate)
+ return 0;
+ /* allocate a new l2 entry */
+ l2_offset = bdrv_getlength(bs->file);
+ /* round to cluster size */
+ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset;
+ tmp = cpu_to_be64(l2_offset);
+ if (bdrv_pwrite_sync(bs->file,
+ s->l1_table_offset + l1_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ new_l2_table = 1;
+ }
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (l2_offset == s->l2_cache_offsets[i]) {
+ /* increment the hit count */
+ if (++s->l2_cache_counts[i] == 0xffffffff) {
+ for(j = 0; j < L2_CACHE_SIZE; j++) {
+ s->l2_cache_counts[j] >>= 1;
+ }
+ }
+ l2_table = s->l2_cache + (i << s->l2_bits);
+ goto found;
+ }
+ }
+ /* not found: load a new entry in the least used one */
+ min_index = 0;
+ min_count = 0xffffffff;
+ for(i = 0; i < L2_CACHE_SIZE; i++) {
+ if (s->l2_cache_counts[i] < min_count) {
+ min_count = s->l2_cache_counts[i];
+ min_index = i;
+ }
+ }
+ l2_table = s->l2_cache + (min_index << s->l2_bits);
+ if (new_l2_table) {
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) < 0)
+ return 0;
+ } else {
+ if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ s->l2_size * sizeof(uint64_t))
+ return 0;
+ }
+ s->l2_cache_offsets[min_index] = l2_offset;
+ s->l2_cache_counts[min_index] = 1;
+ found:
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (!cluster_offset ||
+ ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
+ if (!allocate)
+ return 0;
+ /* allocate a new cluster */
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
+ (n_end - n_start) < s->cluster_sectors) {
+ /* if the cluster is already compressed, we must
+ decompress it in the case it is not completely
+ overwritten */
+ if (decompress_cluster(bs, cluster_offset) < 0)
+ return 0;
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ /* write the cluster content */
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ s->cluster_size)
+ return -1;
+ } else {
+ cluster_offset = bdrv_getlength(bs->file);
+ if (allocate == 1) {
+ /* round to cluster size */
+ cluster_offset = (cluster_offset + s->cluster_size - 1) &
+ ~(s->cluster_size - 1);
+ bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ /* if encrypted, we must initialize the cluster
+ content which won't be written */
+ if (s->crypt_method &&
+ (n_end - n_start) < s->cluster_sectors) {
+ uint64_t start_sect;
+ start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
+ memset(s->cluster_data + 512, 0x00, 512);
+ for(i = 0; i < s->cluster_sectors; i++) {
+ if (i < n_start || i >= n_end) {
+ encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1, 1,
+ &s->aes_encrypt_key);
+ if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ s->cluster_data, 512) != 512)
+ return -1;
+ }
+ }
+ }
+ } else if (allocate == 2) {
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ (uint64_t)compressed_size << (63 - s->cluster_bits);
+ }
+ }
+ /* update L2 table */
+ tmp = cpu_to_be64(cluster_offset);
+ l2_table[l2_index] = tmp;
+ if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
+ return 0;
+ }
+ return cluster_offset;
+}
+
+static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n;
+ uint64_t cluster_offset;
+
+ qemu_co_mutex_lock(&s->lock);
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
+ qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors)
+ n = nb_sectors;
+ *pnum = n;
+ return (cluster_offset != 0);
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ csize = cluster_offset >> (63 - s->cluster_bits);
+ csize &= (s->cluster_size - 1);
+ ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+ if (ret != csize)
+ return -1;
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data, csize) < 0) {
+ return -1;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int ret = 0, n;
+ uint64_t cluster_offset;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+ /* prepare next request */
+ cluster_offset = get_cluster_offset(bs, sector_num << 9,
+ 0, 0, 0, 0);
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+
+ if (!cluster_offset) {
+ if (bs->backing_hd) {
+ /* read from the base image */
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ memset(buf, 0, 512 * n);
+ }
+ } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
+ /* add AIO support for compressed blocks ? */
+ if (decompress_cluster(bs, cluster_offset) < 0) {
+ goto fail;
+ }
+ memcpy(buf,
+ s->cluster_cache + index_in_cluster * 512, 512 * n);
+ } else {
+ if ((cluster_offset & 511) != 0) {
+ goto fail;
+ }
+ hd_iov.iov_base = (void *)buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ if (s->crypt_method) {
+ encrypt_sectors(s, sector_num, buf, buf,
+ n, 0,
+ &s->aes_decrypt_key);
+ }
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+
+done:
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
+ qemu_vfree(orig_buf);
+ }
+
+ return ret;
+
+fail:
+ ret = -EIO;
+ goto done;
+}
+
+static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ uint64_t cluster_offset;
+ const uint8_t *src_buf;
+ int ret = 0, n;
+ uint8_t *cluster_data = NULL;
+ struct iovec hd_iov;
+ QEMUIOVector hd_qiov;
+ uint8_t *buf;
+ void *orig_buf;
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ if (qiov->niov > 1) {
+ buf = orig_buf = qemu_blockalign(bs, qiov->size);
+ qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
+ } else {
+ orig_buf = NULL;
+ buf = (uint8_t *)qiov->iov->iov_base;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (nb_sectors != 0) {
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n = s->cluster_sectors - index_in_cluster;
+ if (n > nb_sectors) {
+ n = nb_sectors;
+ }
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
+ index_in_cluster,
+ index_in_cluster + n);
+ if (!cluster_offset || (cluster_offset & 511) != 0) {
+ ret = -EIO;
+ break;
+ }
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = g_malloc0(s->cluster_size);
+ }
+ encrypt_sectors(s, sector_num, cluster_data, buf,
+ n, 1, &s->aes_encrypt_key);
+ src_buf = cluster_data;
+ } else {
+ src_buf = buf;
+ }
+
+ hd_iov.iov_base = (void *)src_buf;
+ hd_iov.iov_len = n * 512;
+ qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ n, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ break;
+ }
+ ret = 0;
+
+ nb_sectors -= n;
+ sector_num += n;
+ buf += n * 512;
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (qiov->niov > 1) {
+ qemu_vfree(orig_buf);
+ }
+ g_free(cluster_data);
+
+ return ret;
+}
+
+static void qcow_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ g_free(s->l1_table);
+ g_free(s->l2_cache);
+ g_free(s->cluster_cache);
+ g_free(s->cluster_data);
+
+ migrate_del_blocker(s->migration_blocker);
+ error_free(s->migration_blocker);
+}
+
+static int qcow_create(const char *filename, QEMUOptionParameter *options)
+{
+ int header_size, backing_filename_len, l1_size, shift, i;
+ QCowHeader header;
+ uint8_t *tmp;
+ int64_t total_size = 0;
+ const char *backing_file = NULL;
+ int flags = 0;
+ int ret;
+ BlockDriverState *qcow_bs;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ }
+ options++;
+ }
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_truncate(qcow_bs, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+ header.size = cpu_to_be64(total_size * 512);
+ header_size = sizeof(header);
+ backing_filename_len = 0;
+ if (backing_file) {
+ if (strcmp(backing_file, "fat:")) {
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_file);
+ header.backing_file_size = cpu_to_be32(backing_filename_len);
+ header_size += backing_filename_len;
+ } else {
+ /* special backing file for vvfat */
+ backing_file = NULL;
+ }
+ header.cluster_bits = 9; /* 512 byte cluster to avoid copying
+ unmodifyed sectors */
+ header.l2_bits = 12; /* 32 KB L2 tables */
+ } else {
+ header.cluster_bits = 12; /* 4 KB clusters */
+ header.l2_bits = 9; /* 4 KB L2 tables */
+ }
+ header_size = (header_size + 7) & ~7;
+ shift = header.cluster_bits + header.l2_bits;
+ l1_size = ((total_size * 512) + (1LL << shift) - 1) >> shift;
+
+ header.l1_table_offset = cpu_to_be64(header_size);
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ /* write all the data */
+ ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ goto exit;
+ }
+
+ if (backing_file) {
+ ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ backing_file, backing_filename_len);
+ if (ret != backing_filename_len) {
+ goto exit;
+ }
+ }
+
+ tmp = g_malloc0(BDRV_SECTOR_SIZE);
+ for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
+ BDRV_SECTOR_SIZE); i++) {
+ ret = bdrv_pwrite(qcow_bs, header_size +
+ BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ if (ret != BDRV_SECTOR_SIZE) {
+ g_free(tmp);
+ goto exit;
+ }
+ }
+
+ g_free(tmp);
+ ret = 0;
+exit:
+ bdrv_delete(qcow_bs);
+ return ret;
+}
+
+static int qcow_make_empty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
+ memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
+
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
+ out_len, 0, 0);
+ if (cluster_offset == 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ cluster_offset &= s->cluster_offset_mask;
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ return 0;
+}
+
+
+static QEMUOptionParameter qcow_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow = {
+ .format_name = "qcow",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow_probe,
+ .bdrv_open = qcow_open,
+ .bdrv_close = qcow_close,
+ .bdrv_reopen_prepare = qcow_reopen_prepare,
+ .bdrv_create = qcow_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+
+ .bdrv_co_readv = qcow_co_readv,
+ .bdrv_co_writev = qcow_co_writev,
+ .bdrv_co_is_allocated = qcow_co_is_allocated,
+
+ .bdrv_set_key = qcow_set_key,
+ .bdrv_make_empty = qcow_make_empty,
+ .bdrv_write_compressed = qcow_write_compressed,
+ .bdrv_get_info = qcow_get_info,
+
+ .create_options = qcow_create_options,
+};
+
+static void bdrv_qcow_init(void)
+{
+ bdrv_register(&bdrv_qcow);
+}
+
+block_init(bdrv_qcow_init);
diff --git a/contrib/qemu/block/qcow2-cache.c b/contrib/qemu/block/qcow2-cache.c
new file mode 100644
index 000000000..2f3114ecc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cache.c
@@ -0,0 +1,323 @@
+/*
+ * L2/refcount table cache for the QCOW2 format
+ *
+ * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/block_int.h"
+#include "qemu-common.h"
+#include "qcow2.h"
+#include "trace.h"
+
+typedef struct Qcow2CachedTable {
+ void* table;
+ int64_t offset;
+ bool dirty;
+ int cache_hits;
+ int ref;
+} Qcow2CachedTable;
+
+struct Qcow2Cache {
+ Qcow2CachedTable* entries;
+ struct Qcow2Cache* depends;
+ int size;
+ bool depends_on_flush;
+};
+
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2Cache *c;
+ int i;
+
+ c = g_malloc0(sizeof(*c));
+ c->size = num_tables;
+ c->entries = g_malloc0(sizeof(*c->entries) * num_tables);
+
+ for (i = 0; i < c->size; i++) {
+ c->entries[i].table = qemu_blockalign(bs, s->cluster_size);
+ }
+
+ return c;
+}
+
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ assert(c->entries[i].ref == 0);
+ qemu_vfree(c->entries[i].table);
+ }
+
+ g_free(c->entries);
+ g_free(c);
+
+ return 0;
+}
+
+static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int ret;
+
+ ret = qcow2_cache_flush(bs, c->depends);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->depends = NULL;
+ c->depends_on_flush = false;
+
+ return 0;
+}
+
+static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret = 0;
+
+ if (!c->entries[i].dirty || !c->entries[i].offset) {
+ return 0;
+ }
+
+ trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ if (c->depends) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ } else if (c->depends_on_flush) {
+ ret = bdrv_flush(bs->file);
+ if (ret >= 0) {
+ c->depends_on_flush = false;
+ }
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (c == s->refcount_block_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+ } else if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+ }
+
+ ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
+ s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ c->entries[i].dirty = false;
+
+ return 0;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+ BDRVQcowState *s = bs->opaque;
+ int result = 0;
+ int ret;
+ int i;
+
+ trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
+
+ for (i = 0; i < c->size; i++) {
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0 && result != -ENOSPC) {
+ result = ret;
+ }
+ }
+
+ if (result == 0) {
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ result = ret;
+ }
+ }
+
+ return result;
+}
+
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency)
+{
+ int ret;
+
+ if (dependency->depends) {
+ ret = qcow2_cache_flush_dependency(bs, dependency);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (c->depends && (c->depends != dependency)) {
+ ret = qcow2_cache_flush_dependency(bs, c);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ c->depends = dependency;
+ return 0;
+}
+
+void qcow2_cache_depends_on_flush(Qcow2Cache *c)
+{
+ c->depends_on_flush = true;
+}
+
+static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
+{
+ int i;
+ int min_count = INT_MAX;
+ int min_index = -1;
+
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].ref) {
+ continue;
+ }
+
+ if (c->entries[i].cache_hits < min_count) {
+ min_index = i;
+ min_count = c->entries[i].cache_hits;
+ }
+
+ /* Give newer hits priority */
+ /* TODO Check how to optimize the replacement strategy */
+ c->entries[i].cache_hits /= 2;
+ }
+
+ if (min_index == -1) {
+ /* This can't happen in current synchronous code, but leave the check
+ * here as a reminder for whoever starts using AIO with the cache */
+ abort();
+ }
+ return min_index;
+}
+
+static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
+ uint64_t offset, void **table, bool read_from_disk)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+ int ret;
+
+ trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
+ offset, read_from_disk);
+
+ /* Check if the table is already cached */
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].offset == offset) {
+ goto found;
+ }
+ }
+
+ /* If not, write a table back and replace it */
+ i = qcow2_cache_find_entry_to_replace(c);
+ trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ if (i < 0) {
+ return i;
+ }
+
+ ret = qcow2_cache_entry_flush(bs, c, i);
+ if (ret < 0) {
+ return ret;
+ }
+
+ trace_qcow2_cache_get_read(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+ c->entries[i].offset = 0;
+ if (read_from_disk) {
+ if (c == s->l2_table_cache) {
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+ }
+
+ ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* Give the table some hits for the start so that it won't be replaced
+ * immediately. The number 32 is completely arbitrary. */
+ c->entries[i].cache_hits = 32;
+ c->entries[i].offset = offset;
+
+ /* And return the right table */
+found:
+ c->entries[i].cache_hits++;
+ c->entries[i].ref++;
+ *table = c->entries[i].table;
+
+ trace_qcow2_cache_get_done(qemu_coroutine_self(),
+ c == s->l2_table_cache, i);
+
+ return 0;
+}
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, true);
+}
+
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table)
+{
+ return qcow2_cache_do_get(bs, c, offset, table, false);
+}
+
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == *table) {
+ goto found;
+ }
+ }
+ return -ENOENT;
+
+found:
+ c->entries[i].ref--;
+ *table = NULL;
+
+ assert(c->entries[i].ref >= 0);
+ return 0;
+}
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+{
+ int i;
+
+ for (i = 0; i < c->size; i++) {
+ if (c->entries[i].table == table) {
+ goto found;
+ }
+ }
+ abort();
+
+found:
+ c->entries[i].dirty = true;
+}
diff --git a/contrib/qemu/block/qcow2-cluster.c b/contrib/qemu/block/qcow2-cluster.c
new file mode 100644
index 000000000..cca76d4fc
--- /dev/null
+++ b/contrib/qemu/block/qcow2-cluster.c
@@ -0,0 +1,1478 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <zlib.h>
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+#include "trace.h"
+
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int new_l1_size2, ret, i;
+ uint64_t *new_l1_table;
+ int64_t new_l1_table_offset, new_l1_size;
+ uint8_t data[12];
+
+ if (min_size <= s->l1_size)
+ return 0;
+
+ if (exact_size) {
+ new_l1_size = min_size;
+ } else {
+ /* Bump size up to reduce the number of times we have to grow */
+ new_l1_size = s->l1_size;
+ if (new_l1_size == 0) {
+ new_l1_size = 1;
+ }
+ while (min_size > new_l1_size) {
+ new_l1_size = (new_l1_size * 3 + 1) / 2;
+ }
+ }
+
+ if (new_l1_size > INT_MAX) {
+ return -EFBIG;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
+ s->l1_size, new_l1_size);
+#endif
+
+ new_l1_size2 = sizeof(uint64_t) * new_l1_size;
+ new_l1_table = g_malloc0(align_offset(new_l1_size2, 512));
+ memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
+
+ /* write new table (align to cluster) */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
+ new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+ if (new_l1_table_offset < 0) {
+ g_free(new_l1_table);
+ return new_l1_table_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ if (ret < 0)
+ goto fail;
+ for(i = 0; i < s->l1_size; i++)
+ new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
+
+ /* set new table */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
+ cpu_to_be32w((uint32_t*)data, new_l1_size);
+ cpu_to_be64wu((uint64_t*)(data + 4), new_l1_table_offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ if (ret < 0) {
+ goto fail;
+ }
+ g_free(s->l1_table);
+ qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->l1_table_offset = new_l1_table_offset;
+ s->l1_table = new_l1_table;
+ s->l1_size = new_l1_size;
+ return 0;
+ fail:
+ g_free(new_l1_table);
+ qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
+ QCOW2_DISCARD_OTHER);
+ return ret;
+}
+
+/*
+ * l2_load
+ *
+ * Loads a L2 table into memory. If the table is in the cache, the cache
+ * is used; otherwise the L2 table is loaded from the image file.
+ *
+ * Returns a pointer to the L2 table on success, or NULL if the read from
+ * the image file failed.
+ */
+
+static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+ uint64_t **l2_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
+
+ return ret;
+}
+
+/*
+ * Writes one sector of the L1 table to the disk (can't update single entries
+ * and we really don't want bdrv_pread to perform a read-modify-write)
+ */
+#define L1_ENTRIES_PER_SECTOR (512 / 8)
+static int write_l1_entry(BlockDriverState *bs, int l1_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t buf[L1_ENTRIES_PER_SECTOR];
+ int l1_start_index;
+ int i, ret;
+
+ l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
+ for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
+ buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * l2_allocate
+ *
+ * Allocate a new l2 entry in the file. If l1_index points to an already
+ * used entry in the L2 table (i.e. we are doing a copy on write for the L2
+ * table) copy the contents of the old L2 table into the newly allocated one.
+ * Otherwise the new table is initialized with zeros.
+ *
+ */
+
+static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t old_l2_offset;
+ uint64_t *l2_table;
+ int64_t l2_offset;
+ int ret;
+
+ old_l2_offset = s->l1_table[l1_index];
+
+ trace_qcow2_l2_allocate(bs, l1_index);
+
+ /* allocate a new l2 entry */
+
+ l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
+ if (l2_offset < 0) {
+ return l2_offset;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* allocate a new entry in the l2 cache */
+
+ trace_qcow2_l2_allocate_get_empty(bs, l1_index);
+ ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ l2_table = *table;
+
+ if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
+ /* if there was no old l2 table, clear the new table */
+ memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
+ } else {
+ uint64_t* old_table;
+
+ /* if there was an old l2 table, read it from the disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ ret = qcow2_cache_get(bs, s->l2_table_cache,
+ old_l2_offset & L1E_OFFSET_MASK,
+ (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ memcpy(l2_table, old_table, s->cluster_size);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* write the l2 table to the file */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+
+ trace_qcow2_l2_allocate_write_l2(bs, l1_index);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* update the L1 entry */
+ trace_qcow2_l2_allocate_write_l1(bs, l1_index);
+ s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
+ ret = write_l1_entry(bs, l1_index);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ *table = l2_table;
+ trace_qcow2_l2_allocate_done(bs, l1_index, 0);
+ return 0;
+
+fail:
+ trace_qcow2_l2_allocate_done(bs, l1_index, ret);
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
+ s->l1_table[l1_index] = old_l2_offset;
+ return ret;
+}
+
+/*
+ * Checks how many clusters in a given L2 table are contiguous in the image
+ * file. As soon as one of the flags in the bitmask stop_flags changes compared
+ * to the first cluster, the search is stopped and the cluster is not counted
+ * as contiguous. (This allows it, for example, to stop at the first compressed
+ * cluster which may require a different handling)
+ */
+static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+ uint64_t *l2_table, uint64_t start, uint64_t stop_flags)
+{
+ int i;
+ uint64_t mask = stop_flags | L2E_OFFSET_MASK;
+ uint64_t offset = be64_to_cpu(l2_table[0]) & mask;
+
+ if (!offset)
+ return 0;
+
+ for (i = start; i < start + nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
+ if (offset + (uint64_t) i * cluster_size != l2_entry) {
+ break;
+ }
+ }
+
+ return (i - start);
+}
+
+static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+
+ if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+/* The crypt function is compatible with the linux cryptoloop
+ algorithm for < 4 GB images. NOTE: out_buf == in_buf is
+ supported */
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key)
+{
+ union {
+ uint64_t ll[2];
+ uint8_t b[16];
+ } ivec;
+ int i;
+
+ for(i = 0; i < nb_sectors; i++) {
+ ivec.ll[0] = cpu_to_le64(sector_num);
+ ivec.ll[1] = 0;
+ AES_cbc_encrypt(in_buf, out_buf, 512, key,
+ ivec.b, enc);
+ sector_num++;
+ in_buf += 512;
+ out_buf += 512;
+ }
+}
+
+static int coroutine_fn copy_sectors(BlockDriverState *bs,
+ uint64_t start_sect,
+ uint64_t cluster_offset,
+ int n_start, int n_end)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int n, ret;
+
+ /*
+ * If this is the last cluster and it is only partially used, we must only
+ * copy until the end of the image, or bdrv_check_request will fail for the
+ * bdrv_read/write calls below.
+ */
+ if (start_sect + n_end > bs->total_sectors) {
+ n_end = bs->total_sectors - start_sect;
+ }
+
+ n = n_end - n_start;
+ if (n <= 0) {
+ return 0;
+ }
+
+ iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+
+ /* Call .bdrv_co_readv() directly instead of using the public block-layer
+ * interface. This avoids double I/O throttling and request tracking,
+ * which can lead to deadlock when block layer copy-on-read is enabled.
+ */
+ ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, start_sect + n_start,
+ iov.iov_base, iov.iov_base, n, 1,
+ &s->aes_encrypt_key);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+ ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+
+/*
+ * get_cluster_offset
+ *
+ * For a given offset of the disk image, find the cluster offset in
+ * qcow2 file. The offset is stored in *cluster_offset.
+ *
+ * on entry, *num is the number of contiguous sectors we'd like to
+ * access following offset.
+ *
+ * on exit, *num is the number of contiguous sectors we can read.
+ *
+ * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
+ * cases.
+ */
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset, *l2_table;
+ int l1_bits, c;
+ unsigned int index_in_cluster, nb_clusters;
+ uint64_t nb_available, nb_needed;
+ int ret;
+
+ index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
+ nb_needed = *num + index_in_cluster;
+
+ l1_bits = s->l2_bits + s->cluster_bits;
+
+ /* compute how many bytes there are between the offset and
+ * the end of the l1 entry
+ */
+
+ nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
+
+ /* compute the number of available sectors */
+
+ nb_available = (nb_available >> 9) + index_in_cluster;
+
+ if (nb_needed > nb_available) {
+ nb_needed = nb_available;
+ }
+
+ *cluster_offset = 0;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> l1_bits;
+ if (l1_index >= s->l1_size) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+ if (!l2_offset) {
+ ret = QCOW2_CLUSTER_UNALLOCATED;
+ goto out;
+ }
+
+ /* load the l2 table in memory */
+
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+ *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ nb_clusters = size_to_clusters(s, nb_needed << 9);
+
+ ret = qcow2_get_cluster_type(*cluster_offset);
+ switch (ret) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters can only be processed one by one */
+ c = 1;
+ *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
+ break;
+ case QCOW2_CLUSTER_ZERO:
+ if (s->qcow_version < 3) {
+ return -EIO;
+ }
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ /* how many empty clusters ? */
+ c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ *cluster_offset = 0;
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ /* how many allocated clusters ? */
+ c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
+ *cluster_offset &= L2E_OFFSET_MASK;
+ break;
+ default:
+ abort();
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+
+ nb_available = (c * s->cluster_sectors);
+
+out:
+ if (nb_available > nb_needed)
+ nb_available = nb_needed;
+
+ *num = nb_available - index_in_cluster;
+
+ return ret;
+}
+
+/*
+ * get_cluster_table
+ *
+ * for a given disk offset, load (and allocate if needed)
+ * the l2 table.
+ *
+ * the l2 table offset in the qcow2 file and the cluster index
+ * in the l2 table are given to the caller.
+ *
+ * Returns 0 on success, -errno in failure case
+ */
+static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
+ uint64_t **new_l2_table,
+ int *new_l2_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset;
+ uint64_t *l2_table = NULL;
+ int ret;
+
+ /* seek the the l2 offset in the l1 table */
+
+ l1_index = offset >> (s->l2_bits + s->cluster_bits);
+ if (l1_index >= s->l1_size) {
+ ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ assert(l1_index < s->l1_size);
+ l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
+
+ /* seek the l2 table of the given l2 offset */
+
+ if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
+ /* load the l2 table in memory */
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+ } else {
+ /* First allocate a new L2 table (and do COW if needed) */
+ ret = l2_allocate(bs, l1_index, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Then decrease the refcount of the old table */
+ if (l2_offset) {
+ qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ /* find the cluster offset for the given disk offset */
+
+ l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
+
+ *new_l2_table = l2_table;
+ *new_l2_index = l2_index;
+
+ return 0;
+}
+
+/*
+ * alloc_compressed_cluster_offset
+ *
+ * For a given offset of the disk image, return cluster offset in
+ * qcow2 file.
+ *
+ * If the offset is not found, allocate a new compressed cluster.
+ *
+ * Return the cluster offset if successful,
+ * Return 0, otherwise.
+ *
+ */
+
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index, ret;
+ uint64_t *l2_table;
+ int64_t cluster_offset;
+ int nb_csectors;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return 0;
+ }
+
+ /* Compression can't overwrite anything. Fail if the cluster was already
+ * allocated. */
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ if (cluster_offset & L2E_OFFSET_MASK) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
+ if (cluster_offset < 0) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ return 0;
+ }
+
+ nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
+ (cluster_offset >> 9);
+
+ cluster_offset |= QCOW_OFLAG_COMPRESSED |
+ ((uint64_t)nb_csectors << s->csize_shift);
+
+ /* update L2 table */
+
+ /* compressed clusters never have the copied flag */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index] = cpu_to_be64(cluster_offset);
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return 0;
+ }
+
+ return cluster_offset;
+}
+
+static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ if (r->nb_sectors == 0) {
+ return 0;
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
+ r->offset / BDRV_SECTOR_SIZE,
+ r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ qemu_co_mutex_lock(&s->lock);
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Before we update the L2 table to actually point to the new cluster, we
+ * need to be sure that the refcounts have been increased and COW was
+ * handled.
+ */
+ qcow2_cache_depends_on_flush(s->l2_table_cache);
+
+ return 0;
+}
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, j = 0, l2_index, ret;
+ uint64_t *old_cluster, *l2_table;
+ uint64_t cluster_offset = m->alloc_offset;
+
+ trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
+ assert(m->nb_clusters > 0);
+
+ old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t));
+
+ /* copy content of unmodified sectors */
+ ret = perform_cow(bs, m, &m->cow_start);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = perform_cow(bs, m, &m->cow_end);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /* Update L2 table. */
+ if (s->use_lazy_refcounts) {
+ qcow2_mark_dirty(bs);
+ }
+ if (qcow2_need_accurate_refcounts(s)) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+
+ ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ goto err;
+ }
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+
+ for (i = 0; i < m->nb_clusters; i++) {
+ /* if two concurrent writes happen to the same unallocated cluster
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * copy_sectors()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if(l2_table[l2_index + i] != 0)
+ old_cluster[j++] = l2_table[l2_index + i];
+
+ l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
+ (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
+ }
+
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /*
+ * If this was a COW, we need to decrease the refcount of the old cluster.
+ * Also flush bs->file to get the right order for L2 and refcount update.
+ *
+ * Don't discard clusters that reach a refcount of 0 (e.g. compressed
+ * clusters), the next write will reuse them anyway.
+ */
+ if (j != 0) {
+ for (i = 0; i < j; i++) {
+ qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
+ QCOW2_DISCARD_NEVER);
+ }
+ }
+
+ ret = 0;
+err:
+ g_free(old_cluster);
+ return ret;
+ }
+
+/*
+ * Returns the number of contiguous clusters that can be used for an allocating
+ * write, but require COW to be performed (this includes yet unallocated space,
+ * which must copy from the backing file)
+ */
+static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+ uint64_t *l2_table, int l2_index)
+{
+ int i;
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
+ int cluster_type = qcow2_get_cluster_type(l2_entry);
+
+ switch(cluster_type) {
+ case QCOW2_CLUSTER_NORMAL:
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ goto out;
+ }
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_COMPRESSED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+ }
+
+out:
+ assert(i <= nb_clusters);
+ return i;
+}
+
+/*
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
+ *
+ * Returns:
+ * 0 if there was no dependency. *cur_bytes indicates the number of
+ * bytes from guest_offset that can be read before the next
+ * dependency must be processed (or the request is complete)
+ *
+ * -EAGAIN if we had to wait for another request, previously gathered
+ * information on cluster allocation may be invalid now. The caller
+ * must start over anyway, so consider *cur_bytes undefined.
+ */
+static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *cur_bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowL2Meta *old_alloc;
+ uint64_t bytes = *cur_bytes;
+
+ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+ uint64_t start = guest_offset;
+ uint64_t end = start + bytes;
+ uint64_t old_start = l2meta_cow_start(old_alloc);
+ uint64_t old_end = l2meta_cow_end(old_alloc);
+
+ if (end <= old_start || start >= old_end) {
+ /* No intersection */
+ } else {
+ if (start < old_start) {
+ /* Stop at the start of a running allocation */
+ bytes = old_start - start;
+ } else {
+ bytes = 0;
+ }
+
+ /* Stop if already an l2meta exists. After yielding, it wouldn't
+ * be valid any more, so we'd have to clean up the old L2Metas
+ * and deal with requests depending on them before starting to
+ * gather new ones. Not worth the trouble. */
+ if (bytes == 0 && *m) {
+ *cur_bytes = 0;
+ return 0;
+ }
+
+ if (bytes == 0) {
+ /* Wait for the dependency to complete. We need to recheck
+ * the free/allocated clusters when we continue. */
+ qemu_co_mutex_unlock(&s->lock);
+ qemu_co_queue_wait(&old_alloc->dependent_requests);
+ qemu_co_mutex_lock(&s->lock);
+ return -EAGAIN;
+ }
+ }
+ }
+
+ /* Make sure that existing clusters and new allocations are only used up to
+ * the next dependency if we shortened the request above */
+ *cur_bytes = bytes;
+
+ return 0;
+}
+
+/*
+ * Checks how many already allocated clusters that don't require a copy on
+ * write there are at the given guest_offset (up to *bytes). If
+ * *host_offset is not zero, only physically contiguous clusters beginning at
+ * this host offset are counted.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no allocated clusters are available at the given offset.
+ * *bytes is normally unchanged. It is set to 0 if the cluster
+ * is allocated and doesn't need COW, but doesn't have the right
+ * physical offset.
+ *
+ * 1: if allocated clusters that don't require a COW are available at
+ * the requested offset. *bytes may have decreased and describes
+ * the length of the area that can be written to.
+ *
+ * -errno: in error cases
+ */
+static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t cluster_offset;
+ uint64_t *l2_table;
+ unsigned int nb_clusters;
+ unsigned int keep_clusters;
+ int ret, pret;
+
+ trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+
+ assert(*host_offset == 0 || offset_into_cluster(s, guest_offset)
+ == offset_into_cluster(s, *host_offset));
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ /* Check how many clusters are already allocated and don't need COW */
+ if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
+ && (cluster_offset & QCOW_OFLAG_COPIED))
+ {
+ /* If a specific host_offset is required, check it */
+ bool offset_matches =
+ (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
+
+ if (*host_offset != 0 && !offset_matches) {
+ *bytes = 0;
+ ret = 0;
+ goto out;
+ }
+
+ /* We keep all QCOW_OFLAG_COPIED clusters */
+ keep_clusters =
+ count_contiguous_clusters(nb_clusters, s->cluster_size,
+ &l2_table[l2_index], 0,
+ QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
+ assert(keep_clusters <= nb_clusters);
+
+ *bytes = MIN(*bytes,
+ keep_clusters * s->cluster_size
+ - offset_into_cluster(s, guest_offset));
+
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+ /* Cleanup */
+out:
+ pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (pret < 0) {
+ return pret;
+ }
+
+ /* Only return a host offset if we actually made progress. Otherwise we
+ * would make requirements for handle_alloc() that it can't fulfill */
+ if (ret) {
+ *host_offset = (cluster_offset & L2E_OFFSET_MASK)
+ + offset_into_cluster(s, guest_offset);
+ }
+
+ return ret;
+}
+
+/*
+ * Allocates new clusters for the given guest_offset.
+ *
+ * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
+ * contain the number of clusters that have been allocated and are contiguous
+ * in the image file.
+ *
+ * If *host_offset is non-zero, it specifies the offset in the image file at
+ * which the new clusters must start. *nb_clusters can be 0 on return in this
+ * case if the cluster at host_offset is already in use. If *host_offset is
+ * zero, the clusters can be allocated anywhere in the image file.
+ *
+ * *host_offset is updated to contain the offset into the image file at which
+ * the first allocated cluster starts.
+ *
+ * Return 0 on success and -errno in error cases. -EAGAIN means that the
+ * function has been waiting for another request and the allocation must be
+ * restarted, but the whole request should not be failed.
+ */
+static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, unsigned int *nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
+ *host_offset, *nb_clusters);
+
+ /* Allocate new clusters */
+ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
+ if (*host_offset == 0) {
+ int64_t cluster_offset =
+ qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
+ if (cluster_offset < 0) {
+ return cluster_offset;
+ }
+ *host_offset = cluster_offset;
+ return 0;
+ } else {
+ int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ if (ret < 0) {
+ return ret;
+ }
+ *nb_clusters = ret;
+ return 0;
+ }
+}
+
+/*
+ * Allocates new clusters for an area that either is yet unallocated or needs a
+ * copy on write. If *host_offset is non-zero, clusters are only allocated if
+ * the new allocation can match the specified host offset.
+ *
+ * Note that guest_offset may not be cluster aligned. In this case, the
+ * returned *host_offset points to exact byte referenced by guest_offset and
+ * therefore isn't cluster aligned as well.
+ *
+ * Returns:
+ * 0: if no clusters could be allocated. *bytes is set to 0,
+ * *host_offset is left unchanged.
+ *
+ * 1: if new clusters were allocated. *bytes may be decreased if the
+ * new allocation doesn't cover all of the requested area.
+ * *host_offset is updated to contain the host offset of the first
+ * newly allocated cluster.
+ *
+ * -errno: in error cases
+ */
+static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
+ uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ int l2_index;
+ uint64_t *l2_table;
+ uint64_t entry;
+ unsigned int nb_clusters;
+ int ret;
+
+ uint64_t alloc_cluster_offset;
+
+ trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
+ *bytes);
+ assert(*bytes > 0);
+
+ /*
+ * Calculate the number of clusters to look for. We stop at L2 table
+ * boundaries to keep things simple.
+ */
+ nb_clusters =
+ size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
+
+ l2_index = offset_to_l2_index(s, guest_offset);
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ /* Find L2 entry for the first involved cluster */
+ ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ entry = be64_to_cpu(l2_table[l2_index]);
+
+ /* For the moment, overwrite compressed clusters one by one */
+ if (entry & QCOW_OFLAG_COMPRESSED) {
+ nb_clusters = 1;
+ } else {
+ nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
+ }
+
+ /* This function is only called when there were no non-COW clusters, so if
+ * we can't find any unallocated or COW clusters either, something is
+ * wrong with our code. */
+ assert(nb_clusters > 0);
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate, if necessary at a given offset in the image file */
+ alloc_cluster_offset = start_of_cluster(s, *host_offset);
+ ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
+ &nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Can't extend contiguous allocation */
+ if (nb_clusters == 0) {
+ *bytes = 0;
+ return 0;
+ }
+
+ /*
+ * Save info needed for meta data update.
+ *
+ * requested_sectors: Number of sectors from the start of the first
+ * newly allocated cluster to the end of the (possibly shortened
+ * before) write request.
+ *
+ * avail_sectors: Number of sectors from the start of the first
+ * newly allocated to the end of the last newly allocated cluster.
+ *
+ * nb_sectors: The number of sectors from the start of the first
+ * newly allocated cluster to the end of the area that the write
+ * request actually writes to (excluding COW at the end)
+ */
+ int requested_sectors =
+ (*bytes + offset_into_cluster(s, guest_offset))
+ >> BDRV_SECTOR_BITS;
+ int avail_sectors = nb_clusters
+ << (s->cluster_bits - BDRV_SECTOR_BITS);
+ int alloc_n_start = offset_into_cluster(s, guest_offset)
+ >> BDRV_SECTOR_BITS;
+ int nb_sectors = MIN(requested_sectors, avail_sectors);
+ QCowL2Meta *old_m = *m;
+
+ *m = g_malloc0(sizeof(**m));
+
+ **m = (QCowL2Meta) {
+ .next = old_m,
+
+ .alloc_offset = alloc_cluster_offset,
+ .offset = start_of_cluster(s, guest_offset),
+ .nb_clusters = nb_clusters,
+ .nb_available = nb_sectors,
+
+ .cow_start = {
+ .offset = 0,
+ .nb_sectors = alloc_n_start,
+ },
+ .cow_end = {
+ .offset = nb_sectors * BDRV_SECTOR_SIZE,
+ .nb_sectors = avail_sectors - nb_sectors,
+ },
+ };
+ qemu_co_queue_init(&(*m)->dependent_requests);
+ QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
+
+ *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
+ *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
+ - offset_into_cluster(s, guest_offset));
+ assert(*bytes != 0);
+
+ return 1;
+
+fail:
+ if (*m && (*m)->nb_clusters > 0) {
+ QLIST_REMOVE(*m, next_in_flight);
+ }
+ return ret;
+}
+
+/*
+ * alloc_cluster_offset
+ *
+ * For a given offset on the virtual disk, find the cluster offset in qcow2
+ * file. If the offset is not found, allocate a new cluster.
+ *
+ * If the cluster was already allocated, m->nb_clusters is set to 0 and
+ * other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. In this case, the other
+ * fields of m are valid and contain information about the first allocated
+ * cluster.
+ *
+ * If the request conflicts with another write request in flight, the coroutine
+ * is queued and will be reentered when the dependency has completed.
+ *
+ * Return 0 on success and -errno in error cases
+ */
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t start, remaining;
+ uint64_t cluster_offset;
+ uint64_t cur_bytes;
+ int ret;
+
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
+ n_start, n_end);
+
+ assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset));
+ offset = start_of_cluster(s, offset);
+
+again:
+ start = offset + (n_start << BDRV_SECTOR_BITS);
+ remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
+ cluster_offset = 0;
+ *host_offset = 0;
+ cur_bytes = 0;
+ *m = NULL;
+
+ while (true) {
+
+ if (!*host_offset) {
+ *host_offset = start_of_cluster(s, cluster_offset);
+ }
+
+ assert(remaining >= cur_bytes);
+
+ start += cur_bytes;
+ remaining -= cur_bytes;
+ cluster_offset += cur_bytes;
+
+ if (remaining == 0) {
+ break;
+ }
+
+ cur_bytes = remaining;
+
+ /*
+ * Now start gathering as many contiguous clusters as possible:
+ *
+ * 1. Check for overlaps with in-flight allocations
+ *
+ * a) Overlap not in the first cluster -> shorten this request and
+ * let the caller handle the rest in its next loop iteration.
+ *
+ * b) Real overlaps of two requests. Yield and restart the search
+ * for contiguous clusters (the situation could have changed
+ * while we were sleeping)
+ *
+ * c) TODO: Request starts in the same cluster as the in-flight
+ * allocation ends. Shorten the COW of the in-fight allocation,
+ * set cluster_offset to write to the same cluster and set up
+ * the right synchronisation between the in-flight request and
+ * the new one.
+ */
+ ret = handle_dependencies(bs, start, &cur_bytes, m);
+ if (ret == -EAGAIN) {
+ /* Currently handle_dependencies() doesn't yield if we already had
+ * an allocation. If it did, we would have to clean up the L2Meta
+ * structs before starting over. */
+ assert(*m == NULL);
+ goto again;
+ } else if (ret < 0) {
+ return ret;
+ } else if (cur_bytes == 0) {
+ break;
+ } else {
+ /* handle_dependencies() may have decreased cur_bytes (shortened
+ * the allocations below) so that the next dependency is processed
+ * correctly during the next loop iteration. */
+ }
+
+ /*
+ * 2. Count contiguous COPIED clusters.
+ */
+ ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else if (cur_bytes == 0) {
+ break;
+ }
+
+ /*
+ * 3. If the request still hasn't completed, allocate new clusters,
+ * considering any cluster_offset of steps 1c or 2.
+ */
+ ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else {
+ assert(cur_bytes == 0);
+ break;
+ }
+ }
+
+ *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
+ assert(*num > 0);
+ assert(*host_offset != 0);
+
+ return 0;
+}
+
+static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
+ const uint8_t *buf, int buf_size)
+{
+ z_stream strm1, *strm = &strm1;
+ int ret, out_len;
+
+ memset(strm, 0, sizeof(*strm));
+
+ strm->next_in = (uint8_t *)buf;
+ strm->avail_in = buf_size;
+ strm->next_out = out_buf;
+ strm->avail_out = out_buf_size;
+
+ ret = inflateInit2(strm, -12);
+ if (ret != Z_OK)
+ return -1;
+ ret = inflate(strm, Z_FINISH);
+ out_len = strm->next_out - out_buf;
+ if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
+ out_len != out_buf_size) {
+ inflateEnd(strm);
+ return -1;
+ }
+ inflateEnd(strm);
+ return 0;
+}
+
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, csize, nb_csectors, sector_offset;
+ uint64_t coffset;
+
+ coffset = cluster_offset & s->cluster_offset_mask;
+ if (s->cluster_cache_offset != coffset) {
+ nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
+ sector_offset = coffset & 511;
+ csize = nb_csectors * 512 - sector_offset;
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ if (ret < 0) {
+ return ret;
+ }
+ if (decompress_buffer(s->cluster_cache, s->cluster_size,
+ s->cluster_data + sector_offset, csize) < 0) {
+ return -EIO;
+ }
+ s->cluster_cache_offset = coffset;
+ }
+ return 0;
+}
+
+/*
+ * This discards as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of discarded
+ * clusters.
+ */
+static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+ if ((old_offset & L2E_OFFSET_MASK) == 0) {
+ continue;
+ }
+
+ /* First remove L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ l2_table[l2_index + i] = cpu_to_be64(0);
+
+ /* Then decrease the refcount */
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t end_offset;
+ unsigned int nb_clusters;
+ int ret;
+
+ end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+ /* Round start up and end down */
+ offset = align_offset(offset, s->cluster_size);
+ end_offset &= ~(s->cluster_size - 1);
+
+ if (offset > end_offset) {
+ return 0;
+ }
+
+ nb_clusters = size_to_clusters(s, end_offset - offset);
+
+ s->cache_discards = true;
+
+ /* Each L2 table is handled by its own loop iteration */
+ while (nb_clusters > 0) {
+ ret = discard_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
+
+/*
+ * This zeroes as many clusters of nb_clusters as possible at once (i.e.
+ * all clusters in the same L2 table) and returns the number of zeroed
+ * clusters.
+ */
+static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
+ unsigned int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table;
+ int l2_index;
+ int ret;
+ int i;
+
+ ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Limit nb_clusters to one L2 table */
+ nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+
+ for (i = 0; i < nb_clusters; i++) {
+ uint64_t old_offset;
+
+ old_offset = be64_to_cpu(l2_table[l2_index + i]);
+
+ /* Update L2 entries */
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ if (old_offset & QCOW_OFLAG_COMPRESSED) {
+ l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
+ } else {
+ l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return nb_clusters;
+}
+
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int nb_clusters;
+ int ret;
+
+ /* The zero flag is only supported by version 3 and newer */
+ if (s->qcow_version < 3) {
+ return -ENOTSUP;
+ }
+
+ /* Each L2 table is handled by its own loop iteration */
+ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+
+ s->cache_discards = true;
+
+ while (nb_clusters > 0) {
+ ret = zero_single_l2(bs, offset, nb_clusters);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ nb_clusters -= ret;
+ offset += (ret * s->cluster_size);
+ }
+
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qcow2-refcount.c b/contrib/qemu/block/qcow2-refcount.c
new file mode 100644
index 000000000..1244693f3
--- /dev/null
+++ b/contrib/qemu/block/qcow2-refcount.c
@@ -0,0 +1,1374 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length,
+ int addend, enum qcow2_discard_type type);
+
+
+/*********************************************************/
+/* refcount handling */
+
+int qcow2_refcount_init(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret, refcount_table_size2, i;
+
+ refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
+ s->refcount_table = g_malloc(refcount_table_size2);
+ if (s->refcount_table_size > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+ ret = bdrv_pread(bs->file, s->refcount_table_offset,
+ s->refcount_table, refcount_table_size2);
+ if (ret != refcount_table_size2)
+ goto fail;
+ for(i = 0; i < s->refcount_table_size; i++)
+ be64_to_cpus(&s->refcount_table[i]);
+ }
+ return 0;
+ fail:
+ return -ENOMEM;
+}
+
+void qcow2_refcount_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->refcount_table);
+}
+
+
+static int load_refcount_block(BlockDriverState *bs,
+ int64_t refcount_block_offset,
+ void **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ refcount_block);
+
+ return ret;
+}
+
+/*
+ * Returns the refcount of the cluster given by its index. Any non-negative
+ * return value is the refcount of the cluster, negative values are -errno
+ * and indicate an error.
+ */
+static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
+{
+ BDRVQcowState *s = bs->opaque;
+ int refcount_table_index, block_index;
+ int64_t refcount_block_offset;
+ int ret;
+ uint16_t *refcount_block;
+ uint16_t refcount;
+
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+ if (refcount_table_index >= s->refcount_table_size)
+ return 0;
+ refcount_block_offset = s->refcount_table[refcount_table_index];
+ if (!refcount_block_offset)
+ return 0;
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ refcount = be16_to_cpu(refcount_block[block_index]);
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return refcount;
+}
+
+/*
+ * Rounds the refcount table size up to avoid growing the table for each single
+ * refcount block that is allocated.
+ */
+static unsigned int next_refcount_table_size(BDRVQcowState *s,
+ unsigned int min_size)
+{
+ unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
+ unsigned int refcount_table_clusters =
+ MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
+
+ while (min_clusters > refcount_table_clusters) {
+ refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
+ }
+
+ return refcount_table_clusters << (s->cluster_bits - 3);
+}
+
+
+/* Checks if two offsets are described by the same refcount block */
+static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+ uint64_t offset_b)
+{
+ uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+
+ return (block_a == block_b);
+}
+
+/*
+ * Loads a refcount block. If it doesn't exist yet, it is allocated first
+ * (including growing the refcount table if needed).
+ *
+ * Returns 0 on success or -errno in error case
+ */
+static int alloc_refcount_block(BlockDriverState *bs,
+ int64_t cluster_index, uint16_t **refcount_block)
+{
+ BDRVQcowState *s = bs->opaque;
+ unsigned int refcount_table_index;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+ /* Find the refcount block for the given cluster */
+ refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ if (refcount_table_index < s->refcount_table_size) {
+
+ uint64_t refcount_block_offset =
+ s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
+
+ /* If it's already there, we're done */
+ if (refcount_block_offset) {
+ return load_refcount_block(bs, refcount_block_offset,
+ (void**) refcount_block);
+ }
+ }
+
+ /*
+ * If we came here, we need to allocate something. Something is at least
+ * a cluster for the new refcount block. It may also include a new refcount
+ * table if the old refcount table is too small.
+ *
+ * Note that allocating clusters here needs some special care:
+ *
+ * - We can't use the normal qcow2_alloc_clusters(), it would try to
+ * increase the refcount and very likely we would end up with an endless
+ * recursion. Instead we must place the refcount blocks in a way that
+ * they can describe them themselves.
+ *
+ * - We need to consider that at this point we are inside update_refcounts
+ * and doing the initial refcount increase. This means that some clusters
+ * have already been allocated by the caller, but their refcount isn't
+ * accurate yet. free_cluster_index tells us where this allocation ends
+ * as long as we don't overwrite it by freeing clusters.
+ *
+ * - alloc_clusters_noref and qcow2_free_clusters may load a different
+ * refcount block into the cache
+ */
+
+ *refcount_block = NULL;
+
+ /* We write to the refcount table, so we might depend on L2 tables */
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Allocate the refcount block itself and mark it as used */
+ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_block < 0) {
+ return new_block;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
+ " at %" PRIx64 "\n",
+ refcount_table_index, cluster_index << s->cluster_bits, new_block);
+#endif
+
+ if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
+ /* Zero the new refcount block before updating it */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+
+ /* The block describes itself, need to update the cache */
+ int block_index = (new_block >> s->cluster_bits) &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ (*refcount_block)[block_index] = cpu_to_be16(1);
+ } else {
+ /* Described somewhere else. This can recurse at most twice before we
+ * arrive at a block that describes itself. */
+ ret = update_refcount(bs, new_block, s->cluster_size, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* Initialize the new refcount block only after updating its refcount,
+ * update_refcount uses the refcount cache itself */
+ ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
+ (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ memset(*refcount_block, 0, s->cluster_size);
+ }
+
+ /* Now the new refcount block needs to be written to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* If the refcount table is big enough, just hook the block up there */
+ if (refcount_table_index < s->refcount_table_size) {
+ uint64_t data64 = cpu_to_be64(new_block);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
+ ret = bdrv_pwrite_sync(bs->file,
+ s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
+ &data64, sizeof(data64));
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ s->refcount_table[refcount_table_index] = new_block;
+ return 0;
+ }
+
+ ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /*
+ * If we come here, we need to grow the refcount table. Again, a new
+ * refcount table needs some space and we can't simply allocate to avoid
+ * endless recursion.
+ *
+ * Therefore let's grab new refcount blocks at the end of the image, which
+ * will describe themselves and the new refcount table. This way we can
+ * reference them only in the new table and do the switch to the new
+ * refcount table at once without producing an inconsistent state in
+ * between.
+ */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
+
+ /* Calculate the number of refcount blocks needed so far */
+ uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t blocks_used = (s->free_cluster_index +
+ refcount_block_clusters - 1) / refcount_block_clusters;
+
+ /* And now we need at least one block more for the new metadata */
+ uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
+ uint64_t last_table_size;
+ uint64_t blocks_clusters;
+ do {
+ uint64_t table_clusters =
+ size_to_clusters(s, table_size * sizeof(uint64_t));
+ blocks_clusters = 1 +
+ ((table_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters);
+ uint64_t meta_clusters = table_clusters + blocks_clusters;
+
+ last_table_size = table_size;
+ table_size = next_refcount_table_size(s, blocks_used +
+ ((meta_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters));
+
+ } while (last_table_size != table_size);
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
+ s->refcount_table_size, table_size);
+#endif
+
+ /* Create the new refcount table and blocks */
+ uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
+ s->cluster_size;
+ uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
+ uint16_t *new_blocks = g_malloc0(blocks_clusters * s->cluster_size);
+ uint64_t *new_table = g_malloc0(table_size * sizeof(uint64_t));
+
+ assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
+
+ /* Fill the new refcount table */
+ memcpy(new_table, s->refcount_table,
+ s->refcount_table_size * sizeof(uint64_t));
+ new_table[refcount_table_index] = new_block;
+
+ int i;
+ for (i = 0; i < blocks_clusters; i++) {
+ new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+ }
+
+ /* Fill the refcount blocks */
+ uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
+ int block = 0;
+ for (i = 0; i < table_clusters + blocks_clusters; i++) {
+ new_blocks[block++] = cpu_to_be16(1);
+ }
+
+ /* Write refcount blocks to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
+ ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ blocks_clusters * s->cluster_size);
+ g_free(new_blocks);
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* Write refcount table to disk */
+ for(i = 0; i < table_size; i++) {
+ cpu_to_be64s(&new_table[i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ table_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ for(i = 0; i < table_size; i++) {
+ be64_to_cpus(&new_table[i]);
+ }
+
+ /* Hook up the new refcount table in the qcow2 header */
+ uint8_t data[12];
+ cpu_to_be64w((uint64_t*)data, table_offset);
+ cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
+ data, sizeof(data));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* And switch it in memory */
+ uint64_t old_table_offset = s->refcount_table_offset;
+ uint64_t old_table_size = s->refcount_table_size;
+
+ g_free(s->refcount_table);
+ s->refcount_table = new_table;
+ s->refcount_table_size = table_size;
+ s->refcount_table_offset = table_offset;
+
+ /* Free old table. Remember, we must not change free_cluster_index */
+ uint64_t old_free_cluster_index = s->free_cluster_index;
+ qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ s->free_cluster_index = old_free_cluster_index;
+
+ ret = load_refcount_block(bs, new_block, (void**) refcount_block);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+
+fail_table:
+ g_free(new_table);
+fail_block:
+ if (*refcount_block != NULL) {
+ qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ }
+ return ret;
+}
+
+void qcow2_process_discards(BlockDriverState *bs, int ret)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *next;
+
+ QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
+ QTAILQ_REMOVE(&s->discards, d, next);
+
+ /* Discard is optional, ignore the return value */
+ if (ret >= 0) {
+ bdrv_discard(bs->file,
+ d->offset >> BDRV_SECTOR_BITS,
+ d->bytes >> BDRV_SECTOR_BITS);
+ }
+
+ g_free(d);
+ }
+}
+
+static void update_refcount_discard(BlockDriverState *bs,
+ uint64_t offset, uint64_t length)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2DiscardRegion *d, *p, *next;
+
+ QTAILQ_FOREACH(d, &s->discards, next) {
+ uint64_t new_start = MIN(offset, d->offset);
+ uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
+
+ if (new_end - new_start <= length + d->bytes) {
+ /* There can't be any overlap, areas ending up here have no
+ * references any more and therefore shouldn't get freed another
+ * time. */
+ assert(d->bytes + length == new_end - new_start);
+ d->offset = new_start;
+ d->bytes = new_end - new_start;
+ goto found;
+ }
+ }
+
+ d = g_malloc(sizeof(*d));
+ *d = (Qcow2DiscardRegion) {
+ .bs = bs,
+ .offset = offset,
+ .bytes = length,
+ };
+ QTAILQ_INSERT_TAIL(&s->discards, d, next);
+
+found:
+ /* Merge discard requests if they are adjacent now */
+ QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
+ if (p == d
+ || p->offset > d->offset + d->bytes
+ || d->offset > p->offset + p->bytes)
+ {
+ continue;
+ }
+
+ /* Still no overlap possible */
+ assert(p->offset == d->offset + d->bytes
+ || d->offset == p->offset + p->bytes);
+
+ QTAILQ_REMOVE(&s->discards, p, next);
+ d->offset = MIN(d->offset, p->offset);
+ d->bytes += p->bytes;
+ }
+}
+
+/* XXX: cache several refcount block clusters ? */
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ uint16_t *refcount_block = NULL;
+ int64_t old_table_index = -1;
+ int ret;
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
+ offset, length, addend);
+#endif
+ if (length < 0) {
+ return -EINVAL;
+ } else if (length == 0) {
+ return 0;
+ }
+
+ if (addend < 0) {
+ qcow2_cache_set_dependency(bs, s->refcount_block_cache,
+ s->l2_table_cache);
+ }
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + length - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size)
+ {
+ int block_index, refcount;
+ int64_t cluster_index = cluster_offset >> s->cluster_bits;
+ int64_t table_index =
+ cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
+
+ /* Load the refcount block and allocate it if needed */
+ if (table_index != old_table_index) {
+ if (refcount_block) {
+ ret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ old_table_index = table_index;
+
+ qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+
+ /* we can update the count and save it */
+ block_index = cluster_index &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+
+ refcount = be16_to_cpu(refcount_block[block_index]);
+ refcount += addend;
+ if (refcount < 0 || refcount > 0xffff) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (refcount == 0 && cluster_index < s->free_cluster_index) {
+ s->free_cluster_index = cluster_index;
+ }
+ refcount_block[block_index] = cpu_to_be16(refcount);
+
+ if (refcount == 0 && s->discard_passthrough[type]) {
+ update_refcount_discard(bs, cluster_offset, s->cluster_size);
+ }
+ }
+
+ ret = 0;
+fail:
+ if (!s->cache_discards) {
+ qcow2_process_discards(bs, ret);
+ }
+
+ /* Write last changed block to disk */
+ if (refcount_block) {
+ int wret;
+ wret = qcow2_cache_put(bs, s->refcount_block_cache,
+ (void**) &refcount_block);
+ if (wret < 0) {
+ return ret < 0 ? ret : wret;
+ }
+ }
+
+ /*
+ * Try do undo any updates if an error is returned (This may succeed in
+ * some cases like ENOSPC for allocating a new refcount block)
+ */
+ if (ret < 0) {
+ int dummy;
+ dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
+ QCOW2_DISCARD_NEVER);
+ (void)dummy;
+ }
+
+ return ret;
+}
+
+/*
+ * Increases or decreases the refcount of a given cluster by one.
+ * addend must be 1 or -1.
+ *
+ * If the return value is non-negative, it is the new refcount of the cluster.
+ * If it is negative, it is -errno and indicates an error.
+ */
+static int update_cluster_refcount(BlockDriverState *bs,
+ int64_t cluster_index,
+ int addend,
+ enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
+ type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_refcount(bs, cluster_index);
+}
+
+
+
+/*********************************************************/
+/* cluster allocation functions */
+
+
+
+/* return < 0 if error */
+static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, nb_clusters, refcount;
+
+ nb_clusters = size_to_clusters(s, size);
+retry:
+ for(i = 0; i < nb_clusters; i++) {
+ int64_t next_cluster_index = s->free_cluster_index++;
+ refcount = get_refcount(bs, next_cluster_index);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ goto retry;
+ }
+ }
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
+ size,
+ (s->free_cluster_index - nb_clusters) << s->cluster_bits);
+#endif
+ return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
+}
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
+{
+ int64_t offset;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
+ offset = alloc_clusters_noref(bs, size);
+ if (offset < 0) {
+ return offset;
+ }
+
+ ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return offset;
+}
+
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_index;
+ uint64_t old_free_cluster_index;
+ int i, refcount, ret;
+
+ /* Check how many clusters there are free */
+ cluster_index = offset >> s->cluster_bits;
+ for(i = 0; i < nb_clusters; i++) {
+ refcount = get_refcount(bs, cluster_index++);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
+ break;
+ }
+ }
+
+ /* And then allocate them */
+ old_free_cluster_index = s->free_cluster_index;
+ s->free_cluster_index = cluster_index + i;
+
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->free_cluster_index = old_free_cluster_index;
+
+ return i;
+}
+
+/* only used to allocate compressed sectors. We try to allocate
+ contiguous sectors. size must be <= cluster_size */
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t offset, cluster_offset;
+ int free_in_cluster;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
+ assert(size > 0 && size <= s->cluster_size);
+ if (s->free_byte_offset == 0) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ s->free_byte_offset = offset;
+ }
+ redo:
+ free_in_cluster = s->cluster_size -
+ (s->free_byte_offset & (s->cluster_size - 1));
+ if (size <= free_in_cluster) {
+ /* enough space in current cluster */
+ offset = s->free_byte_offset;
+ s->free_byte_offset += size;
+ free_in_cluster -= size;
+ if (free_in_cluster == 0)
+ s->free_byte_offset = 0;
+ if ((offset & (s->cluster_size - 1)) != 0)
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ } else {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
+ cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
+ if ((cluster_offset + s->cluster_size) == offset) {
+ /* we are lucky: contiguous data */
+ offset = s->free_byte_offset;
+ update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
+ QCOW2_DISCARD_NEVER);
+ s->free_byte_offset += size;
+ } else {
+ s->free_byte_offset = offset;
+ goto redo;
+ }
+ }
+
+ /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
+ * or explicitly by update_cluster_refcount(). Refcount blocks must be
+ * flushed before the caller's L2 table updates.
+ */
+ qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+ return offset;
+}
+
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type)
+{
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
+ ret = update_refcount(bs, offset, size, -1, type);
+ if (ret < 0) {
+ fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+ /* TODO Remember the clusters to free them later and avoid leaking */
+ }
+}
+
+/*
+ * Free a cluster using its L2 entry (handles clusters of all types, e.g.
+ * normal cluster, compressed cluster, etc.)
+ */
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ {
+ int nb_csectors;
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ qcow2_free_clusters(bs,
+ (l2_entry & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, type);
+ }
+ break;
+ case QCOW2_CLUSTER_NORMAL:
+ qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
+ nb_clusters << s->cluster_bits, type);
+ break;
+ case QCOW2_CLUSTER_UNALLOCATED:
+ case QCOW2_CLUSTER_ZERO:
+ break;
+ default:
+ abort();
+ }
+}
+
+
+
+/*********************************************************/
+/* snapshots and image creation */
+
+
+
+/* update the refcounts of snapshots and the copied flag */
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, l1_allocated;
+ int64_t old_offset, old_l2_offset;
+ int i, j, l1_modified = 0, nb_csectors, refcount;
+ int ret;
+
+ l2_table = NULL;
+ l1_table = NULL;
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ s->cache_discards = true;
+
+ /* WARNING: qcow2_snapshot_goto relies on this function not using the
+ * l1_table_offset when it is the current s->l1_table_offset! Be careful
+ * when changing this! */
+ if (l1_table_offset != s->l1_table_offset) {
+ l1_table = g_malloc0(align_offset(l1_size2, 512));
+ l1_allocated = 1;
+
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ } else {
+ assert(l1_size == s->l1_size);
+ l1_table = s->l1_table;
+ l1_allocated = 0;
+ }
+
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ old_l2_offset = l2_offset;
+ l2_offset &= L1E_OFFSET_MASK;
+
+ ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ for(j = 0; j < s->l2_size; j++) {
+ offset = be64_to_cpu(l2_table[j]);
+ if (offset != 0) {
+ old_offset = offset;
+ offset &= ~QCOW_OFLAG_COPIED;
+ if (offset & QCOW_OFLAG_COMPRESSED) {
+ nb_csectors = ((offset >> s->csize_shift) &
+ s->csize_mask) + 1;
+ if (addend != 0) {
+ int ret;
+ ret = update_refcount(bs,
+ (offset & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ /* compressed clusters are never modified */
+ refcount = 2;
+ } else {
+ uint64_t cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, cluster_index, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, cluster_index);
+ }
+
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ }
+ }
+
+ if (refcount == 1) {
+ offset |= QCOW_OFLAG_COPIED;
+ }
+ if (offset != old_offset) {
+ if (addend > 0) {
+ qcow2_cache_set_dependency(bs, s->l2_table_cache,
+ s->refcount_block_cache);
+ }
+ l2_table[j] = cpu_to_be64(offset);
+ qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ }
+ }
+ }
+
+ ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ if (ret < 0) {
+ goto fail;
+ }
+
+
+ if (addend != 0) {
+ refcount = update_cluster_refcount(bs, l2_offset >> s->cluster_bits, addend,
+ QCOW2_DISCARD_SNAPSHOT);
+ } else {
+ refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+ }
+ if (refcount < 0) {
+ ret = refcount;
+ goto fail;
+ } else if (refcount == 1) {
+ l2_offset |= QCOW_OFLAG_COPIED;
+ }
+ if (l2_offset != old_l2_offset) {
+ l1_table[i] = l2_offset;
+ l1_modified = 1;
+ }
+ }
+ }
+
+ ret = bdrv_flush(bs);
+fail:
+ if (l2_table) {
+ qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
+ }
+
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ /* Update L1 only if it isn't deleted anyway (addend = -1) */
+ if (ret == 0 && addend >= 0 && l1_modified) {
+ for (i = 0; i < l1_size; i++) {
+ cpu_to_be64s(&l1_table[i]);
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+
+ for (i = 0; i < l1_size; i++) {
+ be64_to_cpus(&l1_table[i]);
+ }
+ }
+ if (l1_allocated)
+ g_free(l1_table);
+ return ret;
+}
+
+
+
+
+/*********************************************************/
+/* refcount checking functions */
+
+
+
+/*
+ * Increases the refcount for a range of clusters in a given refcount table.
+ * This is used to construct a temporary refcount table out of L1 and L2 tables
+ * which can be compared the the refcount table saved in the image.
+ *
+ * Modifies the number of errors in res.
+ */
+static void inc_refcounts(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t offset, int64_t size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t start, last, cluster_offset;
+ int k;
+
+ if (size <= 0)
+ return;
+
+ start = offset & ~(s->cluster_size - 1);
+ last = (offset + size - 1) & ~(s->cluster_size - 1);
+ for(cluster_offset = start; cluster_offset <= last;
+ cluster_offset += s->cluster_size) {
+ k = cluster_offset >> s->cluster_bits;
+ if (k < 0) {
+ fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
+ cluster_offset);
+ res->corruptions++;
+ } else if (k >= refcount_table_size) {
+ fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
+ "the end of the image file, can't properly check refcounts.\n",
+ cluster_offset);
+ res->check_errors++;
+ } else {
+ if (++refcount_table[k] == 0) {
+ fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
+ "\n", cluster_offset);
+ res->corruptions++;
+ }
+ }
+ }
+}
+
+/* Flags for check_refcounts_l1() and check_refcounts_l2() */
+enum {
+ CHECK_OFLAG_COPIED = 0x1, /* check QCOW_OFLAG_COPIED matches refcount */
+ CHECK_FRAG_INFO = 0x2, /* update BlockFragInfo counters */
+};
+
+/*
+ * Increases the refcount in the given refcount table for the all clusters
+ * referenced in the L2 table. While doing so, performs some checks on L2
+ * entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+ uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l2_table, l2_entry;
+ uint64_t next_contiguous_offset = 0;
+ int i, l2_size, nb_csectors, refcount;
+
+ /* Read L2 table from disk */
+ l2_size = s->l2_size * sizeof(uint64_t);
+ l2_table = g_malloc(l2_size);
+
+ if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
+ goto fail;
+
+ /* Do the actual checks */
+ for(i = 0; i < s->l2_size; i++) {
+ l2_entry = be64_to_cpu(l2_table[i]);
+
+ switch (qcow2_get_cluster_type(l2_entry)) {
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* Compressed clusters don't have QCOW_OFLAG_COPIED */
+ if (l2_entry & QCOW_OFLAG_COPIED) {
+ fprintf(stderr, "ERROR: cluster %" PRId64 ": "
+ "copied flag must never be set for compressed "
+ "clusters\n", l2_entry >> s->cluster_bits);
+ l2_entry &= ~QCOW_OFLAG_COPIED;
+ res->corruptions++;
+ }
+
+ /* Mark cluster as used */
+ nb_csectors = ((l2_entry >> s->csize_shift) &
+ s->csize_mask) + 1;
+ l2_entry &= s->cluster_offset_mask;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_entry & ~511, nb_csectors * 512);
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ res->bfi.compressed_clusters++;
+
+ /* Compressed clusters are fragmented by nature. Since they
+ * take up sub-sector space but we only have sector granularity
+ * I/O we need to re-read the same sectors even for adjacent
+ * compressed clusters.
+ */
+ res->bfi.fragmented_clusters++;
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ if ((l2_entry & L2E_OFFSET_MASK) == 0) {
+ break;
+ }
+ /* fall through */
+
+ case QCOW2_CLUSTER_NORMAL:
+ {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ uint64_t offset = l2_entry & L2E_OFFSET_MASK;
+
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for offset %"
+ PRIx64 ": %s\n", l2_entry, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
+ PRIx64 " refcount=%d\n", l2_entry, refcount);
+ res->corruptions++;
+ }
+ }
+
+ if (flags & CHECK_FRAG_INFO) {
+ res->bfi.allocated_clusters++;
+ if (next_contiguous_offset &&
+ offset != next_contiguous_offset) {
+ res->bfi.fragmented_clusters++;
+ }
+ next_contiguous_offset = offset + s->cluster_size;
+ }
+
+ /* Mark cluster as used */
+ inc_refcounts(bs, res, refcount_table,refcount_table_size,
+ offset, s->cluster_size);
+
+ /* Correct offsets are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
+ "properly aligned; L2 entry corrupted.\n", offset);
+ res->corruptions++;
+ }
+ break;
+ }
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ break;
+
+ default:
+ abort();
+ }
+ }
+
+ g_free(l2_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
+ g_free(l2_table);
+ return -EIO;
+}
+
+/*
+ * Increases the refcount for the L1 table, its L2 tables and all referenced
+ * clusters in the given refcount table. While doing so, performs some checks
+ * on L1 and L2 entries.
+ *
+ * Returns the number of errors found by the checks or -errno if an internal
+ * error occurred.
+ */
+static int check_refcounts_l1(BlockDriverState *bs,
+ BdrvCheckResult *res,
+ uint16_t *refcount_table,
+ int refcount_table_size,
+ int64_t l1_table_offset, int l1_size,
+ int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t *l1_table, l2_offset, l1_size2;
+ int i, refcount, ret;
+
+ l1_size2 = l1_size * sizeof(uint64_t);
+
+ /* Mark L1 table as used */
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l1_table_offset, l1_size2);
+
+ /* Read L1 table entries from disk */
+ if (l1_size2 == 0) {
+ l1_table = NULL;
+ } else {
+ l1_table = g_malloc(l1_size2);
+ if (bdrv_pread(bs->file, l1_table_offset,
+ l1_table, l1_size2) != l1_size2)
+ goto fail;
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ }
+
+ /* Do the actual checks */
+ for(i = 0; i < l1_size; i++) {
+ l2_offset = l1_table[i];
+ if (l2_offset) {
+ /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
+ if (flags & CHECK_OFLAG_COPIED) {
+ refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
+ >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for l2_offset %"
+ PRIx64 ": %s\n", l2_offset, strerror(-refcount));
+ goto fail;
+ }
+ if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
+ fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
+ " refcount=%d\n", l2_offset, refcount);
+ res->corruptions++;
+ }
+ }
+
+ /* Mark L2 table as used */
+ l2_offset &= L1E_OFFSET_MASK;
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_offset, s->cluster_size);
+
+ /* L2 tables are cluster aligned */
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
+ "cluster aligned; L1 entry corrupted\n", l2_offset);
+ res->corruptions++;
+ }
+
+ /* Process and check L2 entries */
+ ret = check_refcounts_l2(bs, res, refcount_table,
+ refcount_table_size, l2_offset, flags);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ }
+ g_free(l1_table);
+ return 0;
+
+fail:
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+ res->check_errors++;
+ g_free(l1_table);
+ return -EIO;
+}
+
+/*
+ * Checks an image for refcount consistency.
+ *
+ * Returns 0 if no errors are found, the number of errors in case the image is
+ * detected as corrupted, and -errno when an internal error occurred.
+ */
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t size, i, highest_cluster;
+ int nb_clusters, refcount1, refcount2;
+ QCowSnapshot *sn;
+ uint16_t *refcount_table;
+ int ret;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ refcount_table = g_malloc0(nb_clusters * sizeof(uint16_t));
+
+ res->bfi.total_clusters =
+ size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
+
+ /* header */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ 0, s->cluster_size);
+
+ /* current L1 table */
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ s->l1_table_offset, s->l1_size,
+ CHECK_OFLAG_COPIED | CHECK_FRAG_INFO);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* snapshots */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ sn->l1_table_offset, sn->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->snapshots_offset, s->snapshots_size);
+
+ /* refcount data */
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+
+ for(i = 0; i < s->refcount_table_size; i++) {
+ uint64_t offset, cluster;
+ offset = s->refcount_table[i];
+ cluster = offset >> s->cluster_bits;
+
+ /* Refcount blocks are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
+ "cluster aligned; refcount table entry corrupted\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (cluster >= nb_clusters) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " is outside image\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (offset != 0) {
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ offset, s->cluster_size);
+ if (refcount_table[cluster] != 1) {
+ fprintf(stderr, "ERROR refcount block %" PRId64
+ " refcount=%d\n",
+ i, refcount_table[cluster]);
+ res->corruptions++;
+ }
+ }
+ }
+
+ /* compare ref counts */
+ for (i = 0, highest_cluster = 0; i < nb_clusters; i++) {
+ refcount1 = get_refcount(bs, i);
+ if (refcount1 < 0) {
+ fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
+ i, strerror(-refcount1));
+ res->check_errors++;
+ continue;
+ }
+
+ refcount2 = refcount_table[i];
+
+ if (refcount1 > 0 || refcount2 > 0) {
+ highest_cluster = i;
+ }
+
+ if (refcount1 != refcount2) {
+
+ /* Check if we're allowed to fix the mismatch */
+ int *num_fixed = NULL;
+ if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
+ num_fixed = &res->leaks_fixed;
+ } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
+ num_fixed = &res->corruptions_fixed;
+ }
+
+ fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
+ num_fixed != NULL ? "Repairing" :
+ refcount1 < refcount2 ? "ERROR" :
+ "Leaked",
+ i, refcount1, refcount2);
+
+ if (num_fixed) {
+ ret = update_refcount(bs, i << s->cluster_bits, 1,
+ refcount2 - refcount1,
+ QCOW2_DISCARD_ALWAYS);
+ if (ret >= 0) {
+ (*num_fixed)++;
+ continue;
+ }
+ }
+
+ /* And if we couldn't, print an error */
+ if (refcount1 < refcount2) {
+ res->corruptions++;
+ } else {
+ res->leaks++;
+ }
+ }
+ }
+
+ res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
+ ret = 0;
+
+fail:
+ g_free(refcount_table);
+
+ return ret;
+}
+
diff --git a/contrib/qemu/block/qcow2-snapshot.c b/contrib/qemu/block/qcow2-snapshot.c
new file mode 100644
index 000000000..0caac9055
--- /dev/null
+++ b/contrib/qemu/block/qcow2-snapshot.c
@@ -0,0 +1,660 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
+typedef struct QEMU_PACKED QCowSnapshotHeader {
+ /* header is 8 byte aligned */
+ uint64_t l1_table_offset;
+
+ uint32_t l1_size;
+ uint16_t id_str_size;
+ uint16_t name_size;
+
+ uint32_t date_sec;
+ uint32_t date_nsec;
+
+ uint64_t vm_clock_nsec;
+
+ uint32_t vm_state_size;
+ uint32_t extra_data_size; /* for extension */
+ /* extra data follows */
+ /* id_str follows */
+ /* name follows */
+} QCowSnapshotHeader;
+
+typedef struct QEMU_PACKED QCowSnapshotExtraData {
+ uint64_t vm_state_size_large;
+ uint64_t disk_size;
+} QCowSnapshotExtraData;
+
+void qcow2_free_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ g_free(s->snapshots[i].name);
+ g_free(s->snapshots[i].id_str);
+ }
+ g_free(s->snapshots);
+ s->snapshots = NULL;
+ s->nb_snapshots = 0;
+}
+
+int qcow2_read_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ QCowSnapshot *sn;
+ int i, id_str_size, name_size;
+ int64_t offset;
+ uint32_t extra_data_size;
+ int ret;
+
+ if (!s->nb_snapshots) {
+ s->snapshots = NULL;
+ s->snapshots_size = 0;
+ return 0;
+ }
+
+ offset = s->snapshots_offset;
+ s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ /* Read statically sized part of the snapshot header */
+ offset = align_offset(offset, 8);
+ ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ offset += sizeof(h);
+ sn = s->snapshots + i;
+ sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
+ sn->l1_size = be32_to_cpu(h.l1_size);
+ sn->vm_state_size = be32_to_cpu(h.vm_state_size);
+ sn->date_sec = be32_to_cpu(h.date_sec);
+ sn->date_nsec = be32_to_cpu(h.date_nsec);
+ sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
+ extra_data_size = be32_to_cpu(h.extra_data_size);
+
+ id_str_size = be16_to_cpu(h.id_str_size);
+ name_size = be16_to_cpu(h.name_size);
+
+ /* Read extra data */
+ ret = bdrv_pread(bs->file, offset, &extra,
+ MIN(sizeof(extra), extra_data_size));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += extra_data_size;
+
+ if (extra_data_size >= 8) {
+ sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
+ }
+
+ if (extra_data_size >= 16) {
+ sn->disk_size = be64_to_cpu(extra.disk_size);
+ } else {
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ }
+
+ /* Read snapshot ID */
+ sn->id_str = g_malloc(id_str_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+ sn->id_str[id_str_size] = '\0';
+
+ /* Read snapshot name */
+ sn->name = g_malloc(name_size + 1);
+ ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ sn->name[name_size] = '\0';
+ }
+
+ s->snapshots_size = offset - s->snapshots_offset;
+ return 0;
+
+fail:
+ qcow2_free_snapshots(bs);
+ return ret;
+}
+
+/* add at the end of the file a new list of snapshots */
+static int qcow2_write_snapshots(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ QCowSnapshotHeader h;
+ QCowSnapshotExtraData extra;
+ int i, name_size, id_str_size, snapshots_size;
+ struct {
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+ } QEMU_PACKED header_data;
+ int64_t offset, snapshots_offset;
+ int ret;
+
+ /* compute the size of the snapshots */
+ offset = 0;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ offset = align_offset(offset, 8);
+ offset += sizeof(h);
+ offset += sizeof(extra);
+ offset += strlen(sn->id_str);
+ offset += strlen(sn->name);
+ }
+ snapshots_size = offset;
+
+ /* Allocate space for the new snapshot list */
+ snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
+ offset = snapshots_offset;
+ if (offset < 0) {
+ return offset;
+ }
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write all snapshots to the new list */
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ memset(&h, 0, sizeof(h));
+ h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
+ h.l1_size = cpu_to_be32(sn->l1_size);
+ /* If it doesn't fit in 32 bit, older implementations should treat it
+ * as a disk-only snapshot rather than truncate the VM state */
+ if (sn->vm_state_size <= 0xffffffff) {
+ h.vm_state_size = cpu_to_be32(sn->vm_state_size);
+ }
+ h.date_sec = cpu_to_be32(sn->date_sec);
+ h.date_nsec = cpu_to_be32(sn->date_nsec);
+ h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
+ h.extra_data_size = cpu_to_be32(sizeof(extra));
+
+ memset(&extra, 0, sizeof(extra));
+ extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
+ extra.disk_size = cpu_to_be64(sn->disk_size);
+
+ id_str_size = strlen(sn->id_str);
+ name_size = strlen(sn->name);
+ h.id_str_size = cpu_to_be16(id_str_size);
+ h.name_size = cpu_to_be16(name_size);
+ offset = align_offset(offset, 8);
+
+ ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(h);
+
+ ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += sizeof(extra);
+
+ ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += id_str_size;
+
+ ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += name_size;
+ }
+
+ /*
+ * Update the header to point to the new snapshot table. This requires the
+ * new table and its refcounts to be stable on disk.
+ */
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
+ offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
+
+ header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
+ header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ &header_data, sizeof(header_data));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* free the old snapshot table */
+ qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
+ QCOW2_DISCARD_SNAPSHOT);
+ s->snapshots_offset = snapshots_offset;
+ s->snapshots_size = snapshots_size;
+ return 0;
+
+fail:
+ return ret;
+}
+
+static void find_new_snapshot_id(BlockDriverState *bs,
+ char *id_str, int id_str_size)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, id, id_max = 0;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn = s->snapshots + i;
+ id = strtoul(sn->id_str, NULL, 10);
+ if (id > id_max)
+ id_max = id;
+ }
+ snprintf(id_str, id_str_size, "%d", id_max + 1);
+}
+
+static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i;
+
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].id_str, id_str))
+ return i;
+ }
+ return -1;
+}
+
+static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
+{
+ BDRVQcowState *s = bs->opaque;
+ int i, ret;
+
+ ret = find_snapshot_by_id(bs, name);
+ if (ret >= 0)
+ return ret;
+ for(i = 0; i < s->nb_snapshots; i++) {
+ if (!strcmp(s->snapshots[i].name, name))
+ return i;
+ }
+ return -1;
+}
+
+/* if no id is provided, a new one is constructed */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *new_snapshot_list = NULL;
+ QCowSnapshot *old_snapshot_list = NULL;
+ QCowSnapshot sn1, *sn = &sn1;
+ int i, ret;
+ uint64_t *l1_table = NULL;
+ int64_t l1_table_offset;
+
+ memset(sn, 0, sizeof(*sn));
+
+ /* Generate an ID if it wasn't passed */
+ if (sn_info->id_str[0] == '\0') {
+ find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
+ }
+
+ /* Check that the ID is unique */
+ if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
+ return -EEXIST;
+ }
+
+ /* Populate sn with passed data */
+ sn->id_str = g_strdup(sn_info->id_str);
+ sn->name = g_strdup(sn_info->name);
+
+ sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ sn->vm_state_size = sn_info->vm_state_size;
+ sn->date_sec = sn_info->date_sec;
+ sn->date_nsec = sn_info->date_nsec;
+ sn->vm_clock_nsec = sn_info->vm_clock_nsec;
+
+ /* Allocate the L1 table of the snapshot and copy the current one there. */
+ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+ if (l1_table_offset < 0) {
+ ret = l1_table_offset;
+ goto fail;
+ }
+
+ sn->l1_table_offset = l1_table_offset;
+ sn->l1_size = s->l1_size;
+
+ l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
+ for(i = 0; i < s->l1_size; i++) {
+ l1_table[i] = cpu_to_be64(s->l1_table[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(l1_table);
+ l1_table = NULL;
+
+ /*
+ * Increase the refcounts of all clusters and make sure everything is
+ * stable on disk before updating the snapshot table to contain a pointer
+ * to the new L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Append the new snapshot to the snapshot list */
+ new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
+ if (s->snapshots) {
+ memcpy(new_snapshot_list, s->snapshots,
+ s->nb_snapshots * sizeof(QCowSnapshot));
+ old_snapshot_list = s->snapshots;
+ }
+ s->snapshots = new_snapshot_list;
+ s->snapshots[s->nb_snapshots++] = *sn;
+
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ g_free(s->snapshots);
+ s->snapshots = old_snapshot_list;
+ goto fail;
+ }
+
+ g_free(old_snapshot_list);
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn->id_str);
+ g_free(sn->name);
+ g_free(l1_table);
+
+ return ret;
+}
+
+/* copy the snapshot 'snapshot_name' into the current disk image */
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ int i, snapshot_index;
+ int cur_l1_bytes, sn_l1_bytes;
+ int ret;
+ uint64_t *sn_l1_table = NULL;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
+ error_report("qcow2: Loading snapshots with different disk "
+ "size is not implemented");
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /*
+ * Make sure that the current L1 table is big enough to contain the whole
+ * L1 table of the snapshot. If the snapshot L1 table is smaller, the
+ * current one must be padded with zeros.
+ */
+ ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ cur_l1_bytes = s->l1_size * sizeof(uint64_t);
+ sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
+
+ /*
+ * Copy the snapshot L1 table to the current L1 table.
+ *
+ * Before overwriting the old current L1 table on disk, make sure to
+ * increase all refcounts for the clusters referenced by the new one.
+ * Decrease the refcount referenced by the old one only when the L1
+ * table is overwritten.
+ */
+ sn_l1_table = g_malloc0(cur_l1_bytes);
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
+ sn->l1_size, 1);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ cur_l1_bytes);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /*
+ * Decrease refcount of clusters of current L1 table.
+ *
+ * At this point, the in-memory s->l1_table points to the old L1 table,
+ * whereas on disk we already have the new one.
+ *
+ * qcow2_update_snapshot_refcount special cases the current L1 table to use
+ * the in-memory data instead of really using the offset to load a new one,
+ * which is why this works.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
+ s->l1_size, -1);
+
+ /*
+ * Now update the in-memory L1 table to be in sync with the on-disk one. We
+ * need to do this even if updating refcounts failed.
+ */
+ for(i = 0;i < s->l1_size; i++) {
+ s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
+ }
+
+ if (ret < 0) {
+ goto fail;
+ }
+
+ g_free(sn_l1_table);
+ sn_l1_table = NULL;
+
+ /*
+ * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
+ * when we decreased the refcount of the old snapshot.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+
+fail:
+ g_free(sn_l1_table);
+ return ret;
+}
+
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot sn;
+ int snapshot_index, ret;
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = s->snapshots[snapshot_index];
+
+ /* Remove it from the snapshot list */
+ memmove(s->snapshots + snapshot_index,
+ s->snapshots + snapshot_index + 1,
+ (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
+ s->nb_snapshots--;
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * The snapshot is now unused, clean up. If we fail after this point, we
+ * won't recover but just leak clusters.
+ */
+ g_free(sn.id_str);
+ g_free(sn.name);
+
+ /*
+ * Now decrease the refcounts of clusters referenced by the snapshot and
+ * free the L1 table.
+ */
+ ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
+ sn.l1_size, -1);
+ if (ret < 0) {
+ return ret;
+ }
+ qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_SNAPSHOT);
+
+ /* must update the copied flag on the current cluster offsets */
+ ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return 0;
+}
+
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
+{
+ BDRVQcowState *s = bs->opaque;
+ QEMUSnapshotInfo *sn_tab, *sn_info;
+ QCowSnapshot *sn;
+ int i;
+
+ if (!s->nb_snapshots) {
+ *psn_tab = NULL;
+ return s->nb_snapshots;
+ }
+
+ sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
+ for(i = 0; i < s->nb_snapshots; i++) {
+ sn_info = sn_tab + i;
+ sn = s->snapshots + i;
+ pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
+ sn->id_str);
+ pstrcpy(sn_info->name, sizeof(sn_info->name),
+ sn->name);
+ sn_info->vm_state_size = sn->vm_state_size;
+ sn_info->date_sec = sn->date_sec;
+ sn_info->date_nsec = sn->date_nsec;
+ sn_info->vm_clock_nsec = sn->vm_clock_nsec;
+ }
+ *psn_tab = sn_tab;
+ return s->nb_snapshots;
+}
+
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
+{
+ int i, snapshot_index;
+ BDRVQcowState *s = bs->opaque;
+ QCowSnapshot *sn;
+ uint64_t *new_l1_table;
+ int new_l1_bytes;
+ int ret;
+
+ assert(bs->read_only);
+
+ /* Search the snapshot */
+ snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
+ if (snapshot_index < 0) {
+ return -ENOENT;
+ }
+ sn = &s->snapshots[snapshot_index];
+
+ /* Allocate and read in the snapshot's L1 table */
+ new_l1_bytes = s->l1_size * sizeof(uint64_t);
+ new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
+
+ ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ if (ret < 0) {
+ g_free(new_l1_table);
+ return ret;
+ }
+
+ /* Switch the L1 table */
+ g_free(s->l1_table);
+
+ s->l1_size = sn->l1_size;
+ s->l1_table_offset = sn->l1_table_offset;
+ s->l1_table = new_l1_table;
+
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+
+ return 0;
+}
diff --git a/contrib/qemu/block/qcow2.c b/contrib/qemu/block/qcow2.c
new file mode 100644
index 000000000..0eceefe2c
--- /dev/null
+++ b/contrib/qemu/block/qcow2.c
@@ -0,0 +1,1825 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include <zlib.h>
+#include "qemu/aes.h"
+#include "block/qcow2.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qbool.h"
+#include "trace.h"
+
+/*
+ Differences with QCOW:
+
+ - Support for multiple incremental snapshots.
+ - Memory management by reference counts.
+ - Clusters which have a reference count of one have the bit
+ QCOW_OFLAG_COPIED to optimize write performance.
+ - Size of compressed clusters is stored in sectors to reduce bit usage
+ in the cluster offsets.
+ - Support for storing additional data (such as the VM state) in the
+ snapshots.
+ - If a backing store is used, the cluster size is not constrained
+ (could be backported to QCOW).
+ - L2 tables have always a size of one cluster.
+*/
+
+
+typedef struct {
+ uint32_t magic;
+ uint32_t len;
+} QCowExtension;
+
+#define QCOW2_EXT_MAGIC_END 0
+#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
+#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
+
+static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ const QCowHeader *cow_header = (const void *)buf;
+
+ if (buf_size >= sizeof(QCowHeader) &&
+ be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
+ be32_to_cpu(cow_header->version) >= 2)
+ return 100;
+ else
+ return 0;
+}
+
+
+/*
+ * read qcow2 extension and fill bs
+ * start reading from start_offset
+ * finish reading upon magic of value 0 or when end_offset reached
+ * unknown magic is skipped (future extension this version knows nothing about)
+ * return 0 upon success, non-0 otherwise
+ */
+static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
+ uint64_t end_offset, void **p_feature_table)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowExtension ext;
+ uint64_t offset;
+ int ret;
+
+#ifdef DEBUG_EXT
+ printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
+#endif
+ offset = start_offset;
+ while (offset < end_offset) {
+
+#ifdef DEBUG_EXT
+ /* Sanity check */
+ if (offset > s->cluster_size)
+ printf("qcow2_read_extension: suspicious offset %lu\n", offset);
+
+ printf("attempting to read extended header in offset %lu\n", offset);
+#endif
+
+ if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+ fprintf(stderr, "qcow2_read_extension: ERROR: "
+ "pread fail from offset %" PRIu64 "\n",
+ offset);
+ return 1;
+ }
+ be32_to_cpus(&ext.magic);
+ be32_to_cpus(&ext.len);
+ offset += sizeof(ext);
+#ifdef DEBUG_EXT
+ printf("ext.magic = 0x%x\n", ext.magic);
+#endif
+ if (ext.len > end_offset - offset) {
+ error_report("Header extension too large");
+ return -EINVAL;
+ }
+
+ switch (ext.magic) {
+ case QCOW2_EXT_MAGIC_END:
+ return 0;
+
+ case QCOW2_EXT_MAGIC_BACKING_FORMAT:
+ if (ext.len >= sizeof(bs->backing_format)) {
+ fprintf(stderr, "ERROR: ext_backing_format: len=%u too large"
+ " (>=%zu)\n",
+ ext.len, sizeof(bs->backing_format));
+ return 2;
+ }
+ if (bdrv_pread(bs->file, offset , bs->backing_format,
+ ext.len) != ext.len)
+ return 3;
+ bs->backing_format[ext.len] = '\0';
+#ifdef DEBUG_EXT
+ printf("Qcow2: Got format extension %s\n", bs->backing_format);
+#endif
+ break;
+
+ case QCOW2_EXT_MAGIC_FEATURE_TABLE:
+ if (p_feature_table != NULL) {
+ void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
+ ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *p_feature_table = feature_table;
+ }
+ break;
+
+ default:
+ /* unknown magic - save it in case we need to rewrite the header */
+ {
+ Qcow2UnknownHeaderExtension *uext;
+
+ uext = g_malloc0(sizeof(*uext) + ext.len);
+ uext->magic = ext.magic;
+ uext->len = ext.len;
+ QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
+
+ ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ break;
+ }
+
+ offset += ((ext.len + 7) & ~7);
+ }
+
+ return 0;
+}
+
+static void cleanup_unknown_header_ext(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2UnknownHeaderExtension *uext, *next;
+
+ QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
+ QLIST_REMOVE(uext, next);
+ g_free(uext);
+ }
+}
+
+static void GCC_FMT_ATTR(2, 3) report_unsupported(BlockDriverState *bs,
+ const char *fmt, ...)
+{
+ char msg[64];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(msg, sizeof(msg), fmt, ap);
+ va_end(ap);
+
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "qcow2", msg);
+}
+
+static void report_unsupported_feature(BlockDriverState *bs,
+ Qcow2Feature *table, uint64_t mask)
+{
+ while (table && table->name[0] != '\0') {
+ if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
+ if (mask & (1 << table->bit)) {
+ report_unsupported(bs, "%.46s",table->name);
+ mask &= ~(1 << table->bit);
+ }
+ }
+ table++;
+ }
+
+ if (mask) {
+ report_unsupported(bs, "Unknown incompatible feature: %" PRIx64, mask);
+ }
+}
+
+/*
+ * Sets the dirty bit and flushes afterwards if necessary.
+ *
+ * The incompatible_features bit is only set if the image file header was
+ * updated successfully. Therefore it is not required to check the return
+ * value of this function.
+ */
+int qcow2_mark_dirty(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t val;
+ int ret;
+
+ assert(s->qcow_version >= 3);
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ return 0; /* already dirty */
+ }
+
+ val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
+ ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+ &val, sizeof(val));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = bdrv_flush(bs->file);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Only treat image as dirty if the header was updated successfully */
+ s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
+ return 0;
+}
+
+/*
+ * Clears the dirty bit and flushes before if necessary. Only call this
+ * function when there are no pending requests, it does not guard against
+ * concurrent requests dirtying the image.
+ */
+static int qcow2_mark_clean(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ int ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+ return qcow2_update_header(bs);
+ }
+ return 0;
+}
+
+static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ int ret = qcow2_check_refcounts(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (fix && result->check_errors == 0 && result->corruptions == 0) {
+ return qcow2_mark_clean(bs);
+ }
+ return ret;
+}
+
+static QemuOptsList qcow2_runtime_opts = {
+ .name = "qcow2",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
+ .desc = {
+ {
+ .name = "lazy_refcounts",
+ .type = QEMU_OPT_BOOL,
+ .help = "Postpone refcount updates",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_REQUEST,
+ .type = QEMU_OPT_BOOL,
+ .help = "Pass guest discard requests to the layer below",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_SNAPSHOT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when snapshot related space "
+ "is freed",
+ },
+ {
+ .name = QCOW2_OPT_DISCARD_OTHER,
+ .type = QEMU_OPT_BOOL,
+ .help = "Generate discard requests when other clusters are freed",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQcowState *s = bs->opaque;
+ int len, i, ret = 0;
+ QCowHeader header;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+ uint64_t ext_end;
+ uint64_t l1_vm_state_index;
+
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto fail;
+ }
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.backing_file_offset);
+ be32_to_cpus(&header.backing_file_size);
+ be64_to_cpus(&header.size);
+ be32_to_cpus(&header.cluster_bits);
+ be32_to_cpus(&header.crypt_method);
+ be64_to_cpus(&header.l1_table_offset);
+ be32_to_cpus(&header.l1_size);
+ be64_to_cpus(&header.refcount_table_offset);
+ be32_to_cpus(&header.refcount_table_clusters);
+ be64_to_cpus(&header.snapshots_offset);
+ be32_to_cpus(&header.nb_snapshots);
+
+ if (header.magic != QCOW_MAGIC) {
+ ret = -EMEDIUMTYPE;
+ goto fail;
+ }
+ if (header.version < 2 || header.version > 3) {
+ report_unsupported(bs, "QCOW version %d", header.version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ s->qcow_version = header.version;
+
+ /* Initialise version 3 header fields */
+ if (header.version == 2) {
+ header.incompatible_features = 0;
+ header.compatible_features = 0;
+ header.autoclear_features = 0;
+ header.refcount_order = 4;
+ header.header_length = 72;
+ } else {
+ be64_to_cpus(&header.incompatible_features);
+ be64_to_cpus(&header.compatible_features);
+ be64_to_cpus(&header.autoclear_features);
+ be32_to_cpus(&header.refcount_order);
+ be32_to_cpus(&header.header_length);
+ }
+
+ if (header.header_length > sizeof(header)) {
+ s->unknown_header_fields_size = header.header_length - sizeof(header);
+ s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
+ ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ s->unknown_header_fields_size);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ if (header.backing_file_offset) {
+ ext_end = header.backing_file_offset;
+ } else {
+ ext_end = 1 << header.cluster_bits;
+ }
+
+ /* Handle feature bits */
+ s->incompatible_features = header.incompatible_features;
+ s->compatible_features = header.compatible_features;
+ s->autoclear_features = header.autoclear_features;
+
+ if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
+ void *feature_table = NULL;
+ qcow2_read_extensions(bs, header.header_length, ext_end,
+ &feature_table);
+ report_unsupported_feature(bs, feature_table,
+ s->incompatible_features &
+ ~QCOW2_INCOMPAT_MASK);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /* Check support for various header values */
+ if (header.refcount_order != 4) {
+ report_unsupported(bs, "%d bit reference counts",
+ 1 << header.refcount_order);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ if (header.cluster_bits < MIN_CLUSTER_BITS ||
+ header.cluster_bits > MAX_CLUSTER_BITS) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.crypt_method > QCOW_CRYPT_AES) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->crypt_method_header = header.crypt_method;
+ if (s->crypt_method_header) {
+ bs->encrypted = 1;
+ }
+ s->cluster_bits = header.cluster_bits;
+ s->cluster_size = 1 << s->cluster_bits;
+ s->cluster_sectors = 1 << (s->cluster_bits - 9);
+ s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
+ s->l2_size = 1 << s->l2_bits;
+ bs->total_sectors = header.size / 512;
+ s->csize_shift = (62 - (s->cluster_bits - 8));
+ s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
+ s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
+ s->refcount_table_offset = header.refcount_table_offset;
+ s->refcount_table_size =
+ header.refcount_table_clusters << (s->cluster_bits - 3);
+
+ s->snapshots_offset = header.snapshots_offset;
+ s->nb_snapshots = header.nb_snapshots;
+
+ /* read the level 1 table */
+ s->l1_size = header.l1_size;
+
+ l1_vm_state_index = size_to_l1(s, header.size);
+ if (l1_vm_state_index > INT_MAX) {
+ ret = -EFBIG;
+ goto fail;
+ }
+ s->l1_vm_state_index = l1_vm_state_index;
+
+ /* the L1 table must contain at least enough entries to put
+ header.size bytes */
+ if (s->l1_size < s->l1_vm_state_index) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->l1_table_offset = header.l1_table_offset;
+ if (s->l1_size > 0) {
+ s->l1_table = g_malloc0(
+ align_offset(s->l1_size * sizeof(uint64_t), 512));
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ s->l1_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
+ }
+
+ /* alloc L2 table/refcount block cache */
+ s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
+ s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
+
+ s->cluster_cache = g_malloc(s->cluster_size);
+ /* one more sector for decompressed data alignment */
+ s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ + 512);
+ s->cluster_cache_offset = -1;
+ s->flags = flags;
+
+ ret = qcow2_refcount_init(bs);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ QLIST_INIT(&s->cluster_allocs);
+ QTAILQ_INIT(&s->discards);
+
+ /* read qcow2 extensions */
+ if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL)) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* read the backing file name */
+ if (header.backing_file_offset != 0) {
+ len = header.backing_file_size;
+ if (len > 1023) {
+ len = 1023;
+ }
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
+ bs->backing_file, len);
+ if (ret < 0) {
+ goto fail;
+ }
+ bs->backing_file[len] = '\0';
+ }
+
+ ret = qcow2_read_snapshots(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Clear unknown autoclear feature bits */
+ if (!bs->read_only && s->autoclear_features != 0) {
+ s->autoclear_features = 0;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Initialise locks */
+ qemu_co_mutex_init(&s->lock);
+
+ /* Repair image if dirty */
+ if (!(flags & BDRV_O_CHECK) && !bs->read_only &&
+ (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
+ BdrvCheckResult result = {0};
+
+ ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ /* Enable lazy_refcounts according to image and command line options */
+ opts = qemu_opts_create_nofail(&qcow2_runtime_opts);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+
+ s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ s->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ qemu_opts_del(opts);
+
+ if (s->use_lazy_refcounts && s->qcow_version < 3) {
+ qerror_report(ERROR_CLASS_GENERIC_ERROR, "Lazy refcounts require "
+ "a qcow2 image with at least qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+#ifdef DEBUG_ALLOC
+ {
+ BdrvCheckResult result = {0};
+ qcow2_check_refcounts(bs, &result, 0);
+ }
+#endif
+ return ret;
+
+ fail:
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+ qcow2_free_snapshots(bs);
+ qcow2_refcount_close(bs);
+ g_free(s->l1_table);
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ return ret;
+}
+
+static int qcow2_set_key(BlockDriverState *bs, const char *key)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint8_t keybuf[16];
+ int len, i;
+
+ memset(keybuf, 0, 16);
+ len = strlen(key);
+ if (len > 16)
+ len = 16;
+ /* XXX: we could compress the chars to 7 bits to increase
+ entropy */
+ for(i = 0;i < len;i++) {
+ keybuf[i] = key[i];
+ }
+ s->crypt_method = s->crypt_method_header;
+
+ if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ return -1;
+ if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
+ return -1;
+#if 0
+ /* test */
+ {
+ uint8_t in[16];
+ uint8_t out[16];
+ uint8_t tmp[16];
+ for(i=0;i<16;i++)
+ in[i] = i;
+ AES_encrypt(in, tmp, &s->aes_encrypt_key);
+ AES_decrypt(tmp, out, &s->aes_decrypt_key);
+ for(i = 0; i < 16; i++)
+ printf(" %02x", tmp[i]);
+ printf("\n");
+ for(i = 0; i < 16; i++)
+ printf(" %02x", out[i]);
+ printf("\n");
+ }
+#endif
+ return 0;
+}
+
+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum)
+{
+ BDRVQcowState *s = bs->opaque;
+ uint64_t cluster_offset;
+ int ret;
+
+ *pnum = nb_sectors;
+ /* FIXME We can get errors here, but the bdrv_co_is_allocated interface
+ * can't pass them on today */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ *pnum = 0;
+ }
+
+ return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO);
+}
+
+/* handle reading after the end of the backing file */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors)
+{
+ int n1;
+ if ((sector_num + nb_sectors) <= bs->total_sectors)
+ return nb_sectors;
+ if (sector_num >= bs->total_sectors)
+ n1 = 0;
+ else
+ n1 = bs->total_sectors - sector_num;
+
+ qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+
+ return n1;
+}
+
+static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster, n1;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset = 0;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ uint8_t *cluster_data = NULL;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ /* prepare next request */
+ cur_nr_sectors = remaining_sectors;
+ if (s->crypt_method) {
+ cur_nr_sectors = MIN(cur_nr_sectors,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ }
+
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9,
+ &cur_nr_sectors, &cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ switch (ret) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+
+ if (bs->backing_hd) {
+ /* read from the base image */
+ n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ sector_num, cur_nr_sectors);
+ if (n1 > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ n1, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ } else {
+ /* Note: in this case, no need to wait */
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ }
+ break;
+
+ case QCOW2_CLUSTER_ZERO:
+ qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ /* add AIO support for compressed blocks ? */
+ ret = qcow2_decompress_cluster(bs, cluster_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ qemu_iovec_from_buf(&hd_qiov, 0,
+ s->cluster_cache + index_in_cluster * 512,
+ 512 * cur_nr_sectors);
+ break;
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((cluster_offset & 511) != 0) {
+ ret = -EIO;
+ goto fail;
+ }
+
+ if (s->crypt_method) {
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ */
+ if (!cluster_data) {
+ cluster_data =
+ qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ }
+
+ assert(cur_nr_sectors <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ 512 * cur_nr_sectors);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ qemu_co_mutex_unlock(&s->lock);
+ ret = bdrv_co_readv(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (s->crypt_method) {
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cluster_data, 512 * cur_nr_sectors);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ ret = -EIO;
+ goto fail;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ int remaining_sectors,
+ QEMUIOVector *qiov)
+{
+ BDRVQcowState *s = bs->opaque;
+ int index_in_cluster;
+ int n_end;
+ int ret;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t cluster_offset;
+ QEMUIOVector hd_qiov;
+ uint64_t bytes_done = 0;
+ uint8_t *cluster_data = NULL;
+ QCowL2Meta *l2meta = NULL;
+
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
+ remaining_sectors);
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ qemu_co_mutex_lock(&s->lock);
+
+ while (remaining_sectors != 0) {
+
+ l2meta = NULL;
+
+ trace_qcow2_writev_start_part(qemu_coroutine_self());
+ index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ n_end = index_in_cluster + remaining_sectors;
+ if (s->crypt_method &&
+ n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors) {
+ n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
+ }
+
+ ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
+ index_in_cluster, n_end, &cur_nr_sectors, &cluster_offset, &l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert((cluster_offset & 511) == 0);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
+ cur_nr_sectors * 512);
+
+ if (s->crypt_method) {
+ if (!cluster_data) {
+ cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
+ s->cluster_size);
+ }
+
+ assert(hd_qiov.size <=
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
+
+ qcow2_encrypt_sectors(s, sector_num, cluster_data,
+ cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cluster_data,
+ cur_nr_sectors * 512);
+ }
+
+ qemu_co_mutex_unlock(&s->lock);
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ (cluster_offset >> 9) + index_in_cluster);
+ ret = bdrv_co_writev(bs->file,
+ (cluster_offset >> 9) + index_in_cluster,
+ cur_nr_sectors, &hd_qiov);
+ qemu_co_mutex_lock(&s->lock);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Take the request off the list of running requests */
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ }
+ ret = 0;
+
+fail:
+ qemu_co_mutex_unlock(&s->lock);
+
+ while (l2meta != NULL) {
+ QCowL2Meta *next;
+
+ if (l2meta->nb_clusters != 0) {
+ QLIST_REMOVE(l2meta, next_in_flight);
+ }
+ qemu_co_queue_restart_all(&l2meta->dependent_requests);
+
+ next = l2meta->next;
+ g_free(l2meta);
+ l2meta = next;
+ }
+
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cluster_data);
+ trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
+
+ return ret;
+}
+
+static void qcow2_close(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ g_free(s->l1_table);
+
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+
+ qcow2_mark_clean(bs);
+
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+
+ g_free(s->unknown_header_fields);
+ cleanup_unknown_header_ext(bs);
+
+ g_free(s->cluster_cache);
+ qemu_vfree(s->cluster_data);
+ qcow2_refcount_close(bs);
+ qcow2_free_snapshots(bs);
+}
+
+static void qcow2_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int flags = s->flags;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint32_t crypt_method = 0;
+ QDict *options;
+
+ /*
+ * Backing files are read-only which makes all of their metadata immutable,
+ * that means we don't have to worry about reopening them here.
+ */
+
+ if (s->crypt_method) {
+ crypt_method = s->crypt_method;
+ memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+
+ qcow2_close(bs);
+
+ options = qdict_new();
+ qdict_put(options, QCOW2_OPT_LAZY_REFCOUNTS,
+ qbool_from_int(s->use_lazy_refcounts));
+
+ memset(s, 0, sizeof(BDRVQcowState));
+ qcow2_open(bs, options, flags);
+
+ QDECREF(options);
+
+ if (crypt_method) {
+ s->crypt_method = crypt_method;
+ memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
+ memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
+ }
+}
+
+static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
+ size_t len, size_t buflen)
+{
+ QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
+ size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
+
+ if (buflen < ext_len) {
+ return -ENOSPC;
+ }
+
+ *ext_backing_fmt = (QCowExtension) {
+ .magic = cpu_to_be32(magic),
+ .len = cpu_to_be32(len),
+ };
+ memcpy(buf + sizeof(QCowExtension), s, len);
+
+ return ext_len;
+}
+
+/*
+ * Updates the qcow2 header, including the variable length parts of it, i.e.
+ * the backing file name and all extensions. qcow2 was not designed to allow
+ * such changes, so if we run out of space (we can only use the first cluster)
+ * this function may fail.
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int qcow2_update_header(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ QCowHeader *header;
+ char *buf;
+ size_t buflen = s->cluster_size;
+ int ret;
+ uint64_t total_size;
+ uint32_t refcount_table_clusters;
+ size_t header_length;
+ Qcow2UnknownHeaderExtension *uext;
+
+ buf = qemu_blockalign(bs, buflen);
+
+ /* Header structure */
+ header = (QCowHeader*) buf;
+
+ if (buflen < sizeof(*header)) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ header_length = sizeof(*header) + s->unknown_header_fields_size;
+ total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
+
+ *header = (QCowHeader) {
+ /* Version 2 fields */
+ .magic = cpu_to_be32(QCOW_MAGIC),
+ .version = cpu_to_be32(s->qcow_version),
+ .backing_file_offset = 0,
+ .backing_file_size = 0,
+ .cluster_bits = cpu_to_be32(s->cluster_bits),
+ .size = cpu_to_be64(total_size),
+ .crypt_method = cpu_to_be32(s->crypt_method_header),
+ .l1_size = cpu_to_be32(s->l1_size),
+ .l1_table_offset = cpu_to_be64(s->l1_table_offset),
+ .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
+ .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
+ .nb_snapshots = cpu_to_be32(s->nb_snapshots),
+ .snapshots_offset = cpu_to_be64(s->snapshots_offset),
+
+ /* Version 3 fields */
+ .incompatible_features = cpu_to_be64(s->incompatible_features),
+ .compatible_features = cpu_to_be64(s->compatible_features),
+ .autoclear_features = cpu_to_be64(s->autoclear_features),
+ .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
+ .header_length = cpu_to_be32(header_length),
+ };
+
+ /* For older versions, write a shorter header */
+ switch (s->qcow_version) {
+ case 2:
+ ret = offsetof(QCowHeader, incompatible_features);
+ break;
+ case 3:
+ ret = sizeof(*header);
+ break;
+ default:
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ memset(buf, 0, buflen);
+
+ /* Preserve any unknown field in the header */
+ if (s->unknown_header_fields_size) {
+ if (buflen < s->unknown_header_fields_size) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
+ buf += s->unknown_header_fields_size;
+ buflen -= s->unknown_header_fields_size;
+ }
+
+ /* Backing file format header extension */
+ if (*bs->backing_format) {
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
+ bs->backing_format, strlen(bs->backing_format),
+ buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* Feature table */
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
+
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
+
+ /* Keep unknown header extensions */
+ QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
+ ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+ }
+
+ /* End of header extensions */
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ buf += ret;
+ buflen -= ret;
+
+ /* Backing file name */
+ if (*bs->backing_file) {
+ size_t backing_file_len = strlen(bs->backing_file);
+
+ if (buflen < backing_file_len) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ /* Using strncpy is ok here, since buf is not NUL-terminated. */
+ strncpy(buf, bs->backing_file, buflen);
+
+ header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
+ header->backing_file_size = cpu_to_be32(backing_file_len);
+ }
+
+ /* Write the new header */
+ ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ qemu_vfree(header);
+ return ret;
+}
+
+static int qcow2_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+
+ return qcow2_update_header(bs);
+}
+
+static int preallocate(BlockDriverState *bs)
+{
+ uint64_t nb_sectors;
+ uint64_t offset;
+ uint64_t host_offset = 0;
+ int num;
+ int ret;
+ QCowL2Meta *meta;
+
+ nb_sectors = bdrv_getlength(bs) >> 9;
+ offset = 0;
+
+ while (nb_sectors) {
+ num = MIN(nb_sectors, INT_MAX >> 9);
+ ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num,
+ &host_offset, &meta);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = qcow2_alloc_cluster_link_l2(bs, meta);
+ if (ret < 0) {
+ qcow2_free_any_clusters(bs, meta->alloc_offset, meta->nb_clusters,
+ QCOW2_DISCARD_NEVER);
+ return ret;
+ }
+
+ /* There are no dependent requests, but we need to remove our request
+ * from the list of in-flight requests */
+ if (meta != NULL) {
+ QLIST_REMOVE(meta, next_in_flight);
+ }
+
+ /* TODO Preallocate data if requested */
+
+ nb_sectors -= num;
+ offset += num << 9;
+ }
+
+ /*
+ * It is expected that the image file is large enough to actually contain
+ * all of the allocated clusters (otherwise we get failing reads after
+ * EOF). Extend the image to the last allocated sector.
+ */
+ if (host_offset != 0) {
+ uint8_t buf[512];
+ memset(buf, 0, 512);
+ ret = bdrv_write(bs->file, (host_offset >> 9) + num - 1, buf, 1);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int qcow2_create2(const char *filename, int64_t total_size,
+ const char *backing_file, const char *backing_format,
+ int flags, size_t cluster_size, int prealloc,
+ QEMUOptionParameter *options, int version)
+{
+ /* Calculate cluster_bits */
+ int cluster_bits;
+ cluster_bits = ffs(cluster_size) - 1;
+ if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
+ (1 << cluster_bits) != cluster_size)
+ {
+ error_report(
+ "Cluster size must be a power of two between %d and %dk",
+ 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
+ return -EINVAL;
+ }
+
+ /*
+ * Open the image file and write a minimal qcow2 header.
+ *
+ * We keep things simple and start with a zero-sized image. We also
+ * do without refcount blocks or a L1 table for now. We'll fix the
+ * inconsistency later.
+ *
+ * We do need a refcount table because growing the refcount table means
+ * allocating two new refcount blocks - the seconds of which would be at
+ * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
+ * size for any qcow2 image.
+ */
+ BlockDriverState* bs;
+ QCowHeader header;
+ uint8_t* refcount_table;
+ int ret;
+
+ ret = bdrv_create_file(filename, options);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Write the header */
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(version);
+ header.cluster_bits = cpu_to_be32(cluster_bits);
+ header.size = cpu_to_be64(0);
+ header.l1_table_offset = cpu_to_be64(0);
+ header.l1_size = cpu_to_be32(0);
+ header.refcount_table_offset = cpu_to_be64(cluster_size);
+ header.refcount_table_clusters = cpu_to_be32(1);
+ header.refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT);
+ header.header_length = cpu_to_be32(sizeof(header));
+
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+
+ if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
+ header.compatible_features |=
+ cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
+ }
+
+ ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Write an empty refcount table */
+ refcount_table = g_malloc0(cluster_size);
+ ret = bdrv_pwrite(bs, cluster_size, refcount_table, cluster_size);
+ g_free(refcount_table);
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ bdrv_close(bs);
+
+ /*
+ * And now open the image and make it consistent first (i.e. increase the
+ * refcount of the cluster that is occupied by the header and the refcount
+ * table)
+ */
+ BlockDriver* drv = bdrv_find_format("qcow2");
+ assert(drv != NULL);
+ ret = bdrv_open(bs, filename, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = qcow2_alloc_clusters(bs, 2 * cluster_size);
+ if (ret < 0) {
+ goto out;
+
+ } else if (ret != 0) {
+ error_report("Huh, first cluster in empty image is already in use?");
+ abort();
+ }
+
+ /* Okay, now that we have a valid image, let's give it the right size */
+ ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Want a backing file? There you go.*/
+ if (backing_file) {
+ ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ /* And if we're supposed to preallocate metadata, do that now */
+ if (prealloc) {
+ BDRVQcowState *s = bs->opaque;
+ qemu_co_mutex_lock(&s->lock);
+ ret = preallocate(bs);
+ qemu_co_mutex_unlock(&s->lock);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int qcow2_create(const char *filename, QEMUOptionParameter *options)
+{
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+ uint64_t sectors = 0;
+ int flags = 0;
+ size_t cluster_size = DEFAULT_CLUSTER_SIZE;
+ int prealloc = 0;
+ int version = 2;
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ sectors = options->value.n / 512;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
+ flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+ if (!options->value.s || !strcmp(options->value.s, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(options->value.s, "metadata")) {
+ prealloc = 1;
+ } else {
+ fprintf(stderr, "Invalid preallocation mode: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
+ if (!options->value.s || !strcmp(options->value.s, "0.10")) {
+ version = 2;
+ } else if (!strcmp(options->value.s, "1.1")) {
+ version = 3;
+ } else {
+ fprintf(stderr, "Invalid compatibility level: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+ flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
+ }
+ options++;
+ }
+
+ if (backing_file && prealloc) {
+ fprintf(stderr, "Backing file and preallocation cannot be used at "
+ "the same time\n");
+ return -EINVAL;
+ }
+
+ if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
+ fprintf(stderr, "Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)\n");
+ return -EINVAL;
+ }
+
+ return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
+ cluster_size, prealloc, options, version);
+}
+
+static int qcow2_make_empty(BlockDriverState *bs)
+{
+#if 0
+ /* XXX: not correct */
+ BDRVQcowState *s = bs->opaque;
+ uint32_t l1_length = s->l1_size * sizeof(uint64_t);
+ int ret;
+
+ memset(s->l1_table, 0, l1_length);
+ if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ if (ret < 0)
+ return ret;
+
+ l2_cache_reset(bs);
+#endif
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ /* Emulate misaligned zero writes */
+ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
+ return -ENOTSUP;
+ }
+
+ /* Whatever is left can use real zero clusters */
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ int ret;
+ BDRVQcowState *s = bs->opaque;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors);
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+}
+
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t new_l1_size;
+ int ret;
+
+ if (offset & 511) {
+ error_report("The new size must be a multiple of 512");
+ return -EINVAL;
+ }
+
+ /* cannot proceed if image has snapshots */
+ if (s->nb_snapshots) {
+ error_report("Can't resize an image which has snapshots");
+ return -ENOTSUP;
+ }
+
+ /* shrinking is currently not supported */
+ if (offset < bs->total_sectors * 512) {
+ error_report("qcow2 doesn't support shrinking images yet");
+ return -ENOTSUP;
+ }
+
+ new_l1_size = size_to_l1(s, offset);
+ ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* write updated header.size */
+ offset = cpu_to_be64(offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ &offset, sizeof(uint64_t));
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->l1_vm_state_index = new_l1_size;
+ return 0;
+}
+
+/* XXX: put compressed sectors first, then all the cluster aligned
+ tables to avoid losing bytes in alignment */
+static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BDRVQcowState *s = bs->opaque;
+ z_stream strm;
+ int ret, out_len;
+ uint8_t *out_buf;
+ uint64_t cluster_offset;
+
+ if (nb_sectors == 0) {
+ /* align end of file to a sector boundary to ease reading with
+ sector based I/Os */
+ cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = (cluster_offset + 511) & ~511;
+ bdrv_truncate(bs->file, cluster_offset);
+ return 0;
+ }
+
+ if (nb_sectors != s->cluster_sectors) {
+ ret = -EINVAL;
+
+ /* Zero-pad last write if image size is not cluster aligned */
+ if (sector_num + nb_sectors == bs->total_sectors &&
+ nb_sectors < s->cluster_sectors) {
+ uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
+ memset(pad_buf, 0, s->cluster_size);
+ memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
+ ret = qcow2_write_compressed(bs, sector_num,
+ pad_buf, s->cluster_sectors);
+ qemu_vfree(pad_buf);
+ }
+ return ret;
+ }
+
+ out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+
+ /* best compression, small window, no zlib header */
+ memset(&strm, 0, sizeof(strm));
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -12,
+ 9, Z_DEFAULT_STRATEGY);
+ if (ret != 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ strm.avail_in = s->cluster_size;
+ strm.next_in = (uint8_t *)buf;
+ strm.avail_out = s->cluster_size;
+ strm.next_out = out_buf;
+
+ ret = deflate(&strm, Z_FINISH);
+ if (ret != Z_STREAM_END && ret != Z_OK) {
+ deflateEnd(&strm);
+ ret = -EINVAL;
+ goto fail;
+ }
+ out_len = strm.next_out - out_buf;
+
+ deflateEnd(&strm);
+
+ if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
+ /* could not compress: write normal cluster */
+ ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
+ sector_num << 9, out_len);
+ if (!cluster_offset) {
+ ret = -EIO;
+ goto fail;
+ }
+ cluster_offset &= s->cluster_offset_mask;
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ ret = 0;
+fail:
+ g_free(out_buf);
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+
+ qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+
+ if (qcow2_need_accurate_refcounts(s)) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+ }
+ qemu_co_mutex_unlock(&s->lock);
+
+ return 0;
+}
+
+static int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+{
+ return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
+}
+
+static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQcowState *s = bs->opaque;
+ bdi->cluster_size = s->cluster_size;
+ bdi->vm_state_offset = qcow2_vm_state_offset(s);
+ return 0;
+}
+
+#if 0
+static void dump_refcounts(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ int64_t nb_clusters, k, k1, size;
+ int refcount;
+
+ size = bdrv_getlength(bs->file);
+ nb_clusters = size_to_clusters(s, size);
+ for(k = 0; k < nb_clusters;) {
+ k1 = k;
+ refcount = get_refcount(bs, k);
+ k++;
+ while (k < nb_clusters && get_refcount(bs, k) == refcount)
+ k++;
+ printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
+ k - k1);
+ }
+}
+#endif
+
+static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
+ bs->growable = 1;
+ ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ BDRVQcowState *s = bs->opaque;
+ int growable = bs->growable;
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
+ bs->growable = 1;
+ ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
+ bs->growable = growable;
+
+ return ret;
+}
+
+static QEMUOptionParameter qcow2_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_COMPAT_LEVEL,
+ .type = OPT_STRING,
+ .help = "Compatibility level (0.10 or 1.1)"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = OPT_FLAG,
+ .help = "Encrypt the image"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "qcow2 cluster size",
+ .value = { .n = DEFAULT_CLUSTER_SIZE },
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, metadata)"
+ },
+ {
+ .name = BLOCK_OPT_LAZY_REFCOUNTS,
+ .type = OPT_FLAG,
+ .help = "Postpone refcount updates",
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_qcow2 = {
+ .format_name = "qcow2",
+ .instance_size = sizeof(BDRVQcowState),
+ .bdrv_probe = qcow2_probe,
+ .bdrv_open = qcow2_open,
+ .bdrv_close = qcow2_close,
+ .bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_create = qcow2_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = qcow2_co_is_allocated,
+ .bdrv_set_key = qcow2_set_key,
+ .bdrv_make_empty = qcow2_make_empty,
+
+ .bdrv_co_readv = qcow2_co_readv,
+ .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
+
+ .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
+ .bdrv_co_discard = qcow2_co_discard,
+ .bdrv_truncate = qcow2_truncate,
+ .bdrv_write_compressed = qcow2_write_compressed,
+
+ .bdrv_snapshot_create = qcow2_snapshot_create,
+ .bdrv_snapshot_goto = qcow2_snapshot_goto,
+ .bdrv_snapshot_delete = qcow2_snapshot_delete,
+ .bdrv_snapshot_list = qcow2_snapshot_list,
+ .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
+ .bdrv_get_info = qcow2_get_info,
+
+ .bdrv_save_vmstate = qcow2_save_vmstate,
+ .bdrv_load_vmstate = qcow2_load_vmstate,
+
+ .bdrv_change_backing_file = qcow2_change_backing_file,
+
+ .bdrv_invalidate_cache = qcow2_invalidate_cache,
+
+ .create_options = qcow2_create_options,
+ .bdrv_check = qcow2_check,
+};
+
+static void bdrv_qcow2_init(void)
+{
+ bdrv_register(&bdrv_qcow2);
+}
+
+block_init(bdrv_qcow2_init);
diff --git a/contrib/qemu/block/qcow2.h b/contrib/qemu/block/qcow2.h
new file mode 100644
index 000000000..3b2d5cda7
--- /dev/null
+++ b/contrib/qemu/block/qcow2.h
@@ -0,0 +1,437 @@
+/*
+ * Block driver for the QCOW version 2 format
+ *
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef BLOCK_QCOW2_H
+#define BLOCK_QCOW2_H
+
+#include "qemu/aes.h"
+#include "block/coroutine.h"
+
+//#define DEBUG_ALLOC
+//#define DEBUG_ALLOC2
+//#define DEBUG_EXT
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
+
+#define QCOW_CRYPT_NONE 0
+#define QCOW_CRYPT_AES 1
+
+#define QCOW_MAX_CRYPT_CLUSTERS 32
+
+/* indicate that the refcount of the referenced cluster is exactly one. */
+#define QCOW_OFLAG_COPIED (1LL << 63)
+/* indicate that the cluster is compressed (they never have the copied flag) */
+#define QCOW_OFLAG_COMPRESSED (1LL << 62)
+/* The cluster reads as all zeros */
+#define QCOW_OFLAG_ZERO (1LL << 0)
+
+#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
+
+#define MIN_CLUSTER_BITS 9
+#define MAX_CLUSTER_BITS 21
+
+#define L2_CACHE_SIZE 16
+
+/* Must be at least 4 to cover all cases of refcount table growth */
+#define REFCOUNT_CACHE_SIZE 4
+
+#define DEFAULT_CLUSTER_SIZE 65536
+
+
+#define QCOW2_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define QCOW2_OPT_DISCARD_REQUEST "pass_discard_request"
+#define QCOW2_OPT_DISCARD_SNAPSHOT "pass_discard_snapshot"
+#define QCOW2_OPT_DISCARD_OTHER "pass_discard_other"
+
+typedef struct QCowHeader {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_file_offset;
+ uint32_t backing_file_size;
+ uint32_t cluster_bits;
+ uint64_t size; /* in bytes */
+ uint32_t crypt_method;
+ uint32_t l1_size; /* XXX: save number of clusters instead ? */
+ uint64_t l1_table_offset;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_clusters;
+ uint32_t nb_snapshots;
+ uint64_t snapshots_offset;
+
+ /* The following fields are only valid for version >= 3 */
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ uint32_t refcount_order;
+ uint32_t header_length;
+} QCowHeader;
+
+typedef struct QCowSnapshot {
+ uint64_t l1_table_offset;
+ uint32_t l1_size;
+ char *id_str;
+ char *name;
+ uint64_t disk_size;
+ uint64_t vm_state_size;
+ uint32_t date_sec;
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec;
+} QCowSnapshot;
+
+struct Qcow2Cache;
+typedef struct Qcow2Cache Qcow2Cache;
+
+typedef struct Qcow2UnknownHeaderExtension {
+ uint32_t magic;
+ uint32_t len;
+ QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
+ uint8_t data[];
+} Qcow2UnknownHeaderExtension;
+
+enum {
+ QCOW2_FEAT_TYPE_INCOMPATIBLE = 0,
+ QCOW2_FEAT_TYPE_COMPATIBLE = 1,
+ QCOW2_FEAT_TYPE_AUTOCLEAR = 2,
+};
+
+/* Incompatible feature bits */
+enum {
+ QCOW2_INCOMPAT_DIRTY_BITNR = 0,
+ QCOW2_INCOMPAT_DIRTY = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
+
+ QCOW2_INCOMPAT_MASK = QCOW2_INCOMPAT_DIRTY,
+};
+
+/* Compatible feature bits */
+enum {
+ QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+ QCOW2_COMPAT_LAZY_REFCOUNTS = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+ QCOW2_COMPAT_FEAT_MASK = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
+enum qcow2_discard_type {
+ QCOW2_DISCARD_NEVER = 0,
+ QCOW2_DISCARD_ALWAYS,
+ QCOW2_DISCARD_REQUEST,
+ QCOW2_DISCARD_SNAPSHOT,
+ QCOW2_DISCARD_OTHER,
+ QCOW2_DISCARD_MAX
+};
+
+typedef struct Qcow2Feature {
+ uint8_t type;
+ uint8_t bit;
+ char name[46];
+} QEMU_PACKED Qcow2Feature;
+
+typedef struct Qcow2DiscardRegion {
+ BlockDriverState *bs;
+ uint64_t offset;
+ uint64_t bytes;
+ QTAILQ_ENTRY(Qcow2DiscardRegion) next;
+} Qcow2DiscardRegion;
+
+typedef struct BDRVQcowState {
+ int cluster_bits;
+ int cluster_size;
+ int cluster_sectors;
+ int l2_bits;
+ int l2_size;
+ int l1_size;
+ int l1_vm_state_index;
+ int csize_shift;
+ int csize_mask;
+ uint64_t cluster_offset_mask;
+ uint64_t l1_table_offset;
+ uint64_t *l1_table;
+
+ Qcow2Cache* l2_table_cache;
+ Qcow2Cache* refcount_block_cache;
+
+ uint8_t *cluster_cache;
+ uint8_t *cluster_data;
+ uint64_t cluster_cache_offset;
+ QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
+
+ uint64_t *refcount_table;
+ uint64_t refcount_table_offset;
+ uint32_t refcount_table_size;
+ int64_t free_cluster_index;
+ int64_t free_byte_offset;
+
+ CoMutex lock;
+
+ uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ uint32_t crypt_method_header;
+ AES_KEY aes_encrypt_key;
+ AES_KEY aes_decrypt_key;
+ uint64_t snapshots_offset;
+ int snapshots_size;
+ int nb_snapshots;
+ QCowSnapshot *snapshots;
+
+ int flags;
+ int qcow_version;
+ bool use_lazy_refcounts;
+
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+
+ uint64_t incompatible_features;
+ uint64_t compatible_features;
+ uint64_t autoclear_features;
+
+ size_t unknown_header_fields_size;
+ void* unknown_header_fields;
+ QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
+ QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
+ bool cache_discards;
+} BDRVQcowState;
+
+/* XXX: use std qcow open function ? */
+typedef struct QCowCreateState {
+ int cluster_size;
+ int cluster_bits;
+ uint16_t *refcount_block;
+ uint64_t *refcount_table;
+ int64_t l1_table_offset;
+ int64_t refcount_table_offset;
+ int64_t refcount_block_offset;
+} QCowCreateState;
+
+struct QCowAIOCB;
+
+typedef struct Qcow2COWRegion {
+ /**
+ * Offset of the COW region in bytes from the start of the first cluster
+ * touched by the request.
+ */
+ uint64_t offset;
+
+ /** Number of sectors to copy */
+ int nb_sectors;
+} Qcow2COWRegion;
+
+/**
+ * Describes an in-flight (part of a) write request that writes to clusters
+ * that are not referenced in their L2 table yet.
+ */
+typedef struct QCowL2Meta
+{
+ /** Guest offset of the first newly allocated cluster */
+ uint64_t offset;
+
+ /** Host offset of the first newly allocated cluster */
+ uint64_t alloc_offset;
+
+ /**
+ * Number of sectors from the start of the first allocated cluster to
+ * the end of the (possibly shortened) request
+ */
+ int nb_available;
+
+ /** Number of newly allocated clusters */
+ int nb_clusters;
+
+ /**
+ * Requests that overlap with this allocation and wait to be restarted
+ * when the allocating request has completed.
+ */
+ CoQueue dependent_requests;
+
+ /**
+ * The COW Region between the start of the first allocated cluster and the
+ * area the guest actually writes to.
+ */
+ Qcow2COWRegion cow_start;
+
+ /**
+ * The COW Region between the area the guest actually writes to and the
+ * end of the last allocated cluster.
+ */
+ Qcow2COWRegion cow_end;
+
+ /** Pointer to next L2Meta of the same write request */
+ struct QCowL2Meta *next;
+
+ QLIST_ENTRY(QCowL2Meta) next_in_flight;
+} QCowL2Meta;
+
+enum {
+ QCOW2_CLUSTER_UNALLOCATED,
+ QCOW2_CLUSTER_NORMAL,
+ QCOW2_CLUSTER_COMPRESSED,
+ QCOW2_CLUSTER_ZERO
+};
+
+#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL
+#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
+
+#define REFT_OFFSET_MASK 0xffffffffffffff00ULL
+
+static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & ~(s->cluster_size - 1);
+}
+
+static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+{
+ return offset & (s->cluster_size - 1);
+}
+
+static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
+{
+ return (size + (s->cluster_size - 1)) >> s->cluster_bits;
+}
+
+static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+{
+ int shift = s->cluster_bits + s->l2_bits;
+ return (size + (1ULL << shift) - 1) >> shift;
+}
+
+static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+{
+ return (offset >> s->cluster_bits) & (s->l2_size - 1);
+}
+
+static inline int64_t align_offset(int64_t offset, int n)
+{
+ offset = (offset + n - 1) & ~(n - 1);
+ return offset;
+}
+
+static inline int qcow2_get_cluster_type(uint64_t l2_entry)
+{
+ if (l2_entry & QCOW_OFLAG_COMPRESSED) {
+ return QCOW2_CLUSTER_COMPRESSED;
+ } else if (l2_entry & QCOW_OFLAG_ZERO) {
+ return QCOW2_CLUSTER_ZERO;
+ } else if (!(l2_entry & L2E_OFFSET_MASK)) {
+ return QCOW2_CLUSTER_UNALLOCATED;
+ } else {
+ return QCOW2_CLUSTER_NORMAL;
+ }
+}
+
+/* Check whether refcounts are eager or lazy */
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+{
+ return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
+}
+
+static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
+{
+ return m->offset + m->cow_start.offset;
+}
+
+static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
+{
+ return m->offset + m->cow_end.offset
+ + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+}
+
+// FIXME Need qcow2_ prefix to global functions
+
+/* qcow2.c functions */
+int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t sector_num, int nb_sectors);
+
+int qcow2_mark_dirty(BlockDriverState *bs);
+int qcow2_update_header(BlockDriverState *bs);
+
+/* qcow2-refcount.c functions */
+int qcow2_refcount_init(BlockDriverState *bs);
+void qcow2_refcount_close(BlockDriverState *bs);
+
+int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size);
+int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int nb_clusters);
+int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
+void qcow2_free_clusters(BlockDriverState *bs,
+ int64_t offset, int64_t size,
+ enum qcow2_discard_type type);
+void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
+ int nb_clusters, enum qcow2_discard_type type);
+
+int qcow2_update_snapshot_refcount(BlockDriverState *bs,
+ int64_t l1_table_offset, int l1_size, int addend);
+
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix);
+
+void qcow2_process_discards(BlockDriverState *bs, int ret);
+
+/* qcow2-cluster.c functions */
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size);
+void qcow2_l2_cache_reset(BlockDriverState *bs);
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
+void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, int enc,
+ const AES_KEY *key);
+
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset);
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m);
+uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+ uint64_t offset,
+ int compressed_size);
+
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
+int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
+ int nb_sectors);
+int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
+
+/* qcow2-snapshot.c functions */
+int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
+int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
+int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name);
+
+void qcow2_free_snapshots(BlockDriverState *bs);
+int qcow2_read_snapshots(BlockDriverState *bs);
+
+/* qcow2-cache.c functions */
+Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
+int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
+
+void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
+ Qcow2Cache *dependency);
+void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+
+int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
+ void **table);
+int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+
+#endif
diff --git a/contrib/qemu/block/qed-check.c b/contrib/qemu/block/qed-check.c
new file mode 100644
index 000000000..b473dcd61
--- /dev/null
+++ b/contrib/qemu/block/qed-check.c
@@ -0,0 +1,248 @@
+/*
+ * QEMU Enhanced Disk Format Consistency Check
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+typedef struct {
+ BDRVQEDState *s;
+ BdrvCheckResult *result;
+ bool fix; /* whether to fix invalid offsets */
+
+ uint64_t nclusters;
+ uint32_t *used_clusters; /* referenced cluster bitmap */
+
+ QEDRequest request;
+} QEDCheck;
+
+static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
+ return !!(bitmap[n / 32] & (1 << (n % 32)));
+}
+
+static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
+ bitmap[n / 32] |= 1 << (n % 32);
+}
+
+/**
+ * Set bitmap bits for clusters
+ *
+ * @check: Check structure
+ * @offset: Starting offset in bytes
+ * @n: Number of clusters
+ */
+static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
+ unsigned int n)
+{
+ uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
+ unsigned int corruptions = 0;
+
+ while (n-- != 0) {
+ /* Clusters should only be referenced once */
+ if (qed_test_bit(check->used_clusters, cluster)) {
+ corruptions++;
+ }
+
+ qed_set_bit(check->used_clusters, cluster);
+ cluster++;
+ }
+
+ check->result->corruptions += corruptions;
+ return corruptions == 0;
+}
+
+/**
+ * Check an L2 table
+ *
+ * @ret: Number of invalid cluster offsets
+ */
+static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid = 0;
+ uint64_t last_offset = 0;
+
+ for (i = 0; i < s->table_nelems; i++) {
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset) ||
+ qed_offset_is_zero_cluster(offset)) {
+ continue;
+ }
+ check->result->bfi.allocated_clusters++;
+ if (last_offset && (last_offset + s->header.cluster_size != offset)) {
+ check->result->bfi.fragmented_clusters++;
+ }
+ last_offset = offset;
+
+ /* Detect invalid cluster offset */
+ if (!qed_check_cluster_offset(s, offset)) {
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid++;
+ continue;
+ }
+
+ qed_set_used_clusters(check, offset, 1);
+ }
+
+ return num_invalid;
+}
+
+/**
+ * Descend tables and check each cluster is referenced once only
+ */
+static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
+{
+ BDRVQEDState *s = check->s;
+ unsigned int i, num_invalid_l1 = 0;
+ int ret, last_error = 0;
+
+ /* Mark L1 table clusters used */
+ qed_set_used_clusters(check, s->header.l1_table_offset,
+ s->header.table_size);
+
+ for (i = 0; i < s->table_nelems; i++) {
+ unsigned int num_invalid_l2;
+ uint64_t offset = table->offsets[i];
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ continue;
+ }
+
+ /* Detect invalid L2 offset */
+ if (!qed_check_table_offset(s, offset)) {
+ /* Clear invalid offset */
+ if (check->fix) {
+ table->offsets[i] = 0;
+ check->result->corruptions_fixed++;
+ } else {
+ check->result->corruptions++;
+ }
+
+ num_invalid_l1++;
+ continue;
+ }
+
+ if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
+ continue; /* skip an invalid table */
+ }
+
+ ret = qed_read_l2_table_sync(s, &check->request, offset);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+
+ num_invalid_l2 = qed_check_l2_table(check,
+ check->request.l2_table->table);
+
+ /* Write out fixed L2 table */
+ if (num_invalid_l2 > 0 && check->fix) {
+ ret = qed_write_l2_table_sync(s, &check->request, 0,
+ s->table_nelems, false);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ continue;
+ }
+ }
+ }
+
+ /* Drop reference to final table */
+ qed_unref_l2_cache_entry(check->request.l2_table);
+ check->request.l2_table = NULL;
+
+ /* Write out fixed L1 table */
+ if (num_invalid_l1 > 0 && check->fix) {
+ ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
+ if (ret) {
+ check->result->check_errors++;
+ last_error = ret;
+ }
+ }
+
+ return last_error;
+}
+
+/**
+ * Check for unreferenced (leaked) clusters
+ */
+static void qed_check_for_leaks(QEDCheck *check)
+{
+ BDRVQEDState *s = check->s;
+ uint64_t i;
+
+ for (i = s->header.header_size; i < check->nclusters; i++) {
+ if (!qed_test_bit(check->used_clusters, i)) {
+ check->result->leaks++;
+ }
+ }
+}
+
+/**
+ * Mark an image clean once it passes check or has been repaired
+ */
+static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
+{
+ /* Skip if there were unfixable corruptions or I/O errors */
+ if (result->corruptions > 0 || result->check_errors > 0) {
+ return;
+ }
+
+ /* Skip if image is already marked clean */
+ if (!(s->header.features & QED_F_NEED_CHECK)) {
+ return;
+ }
+
+ /* Ensure fixes reach storage before clearing check bit */
+ bdrv_flush(s->bs);
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+}
+
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
+{
+ QEDCheck check = {
+ .s = s,
+ .result = result,
+ .nclusters = qed_bytes_to_clusters(s, s->file_size),
+ .request = { .l2_table = NULL },
+ .fix = fix,
+ };
+ int ret;
+
+ check.used_clusters = g_malloc0(((check.nclusters + 31) / 32) *
+ sizeof(check.used_clusters[0]));
+
+ check.result->bfi.total_clusters =
+ (s->header.image_size + s->header.cluster_size - 1) /
+ s->header.cluster_size;
+ ret = qed_check_l1_table(&check, s->l1_table);
+ if (ret == 0) {
+ /* Only check for leaks if entire image was scanned successfully */
+ qed_check_for_leaks(&check);
+
+ if (fix) {
+ qed_check_mark_clean(s, result);
+ }
+ }
+
+ g_free(check.used_clusters);
+ return ret;
+}
diff --git a/contrib/qemu/block/qed-cluster.c b/contrib/qemu/block/qed-cluster.c
new file mode 100644
index 000000000..f64b2af8f
--- /dev/null
+++ b/contrib/qemu/block/qed-cluster.c
@@ -0,0 +1,165 @@
+/*
+ * QEMU Enhanced Disk Format Cluster functions
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+/**
+ * Count the number of contiguous data clusters
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Maximum number of clusters
+ * @offset: Set to first cluster offset
+ *
+ * This function scans tables for contiguous clusters. A contiguous run of
+ * clusters may be allocated, unallocated, or zero.
+ */
+static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
+ QEDTable *table,
+ unsigned int index,
+ unsigned int n,
+ uint64_t *offset)
+{
+ unsigned int end = MIN(index + n, s->table_nelems);
+ uint64_t last = table->offsets[index];
+ unsigned int i;
+
+ *offset = last;
+
+ for (i = index + 1; i < end; i++) {
+ if (qed_offset_is_unalloc_cluster(last)) {
+ /* Counting unallocated clusters */
+ if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
+ break;
+ }
+ } else if (qed_offset_is_zero_cluster(last)) {
+ /* Counting zero clusters */
+ if (!qed_offset_is_zero_cluster(table->offsets[i])) {
+ break;
+ }
+ } else {
+ /* Counting allocated clusters */
+ if (table->offsets[i] != last + s->header.cluster_size) {
+ break;
+ }
+ last = table->offsets[i];
+ }
+ }
+ return i - index;
+}
+
+typedef struct {
+ BDRVQEDState *s;
+ uint64_t pos;
+ size_t len;
+
+ QEDRequest *request;
+
+ /* User callback */
+ QEDFindClusterFunc *cb;
+ void *opaque;
+} QEDFindClusterCB;
+
+static void qed_find_cluster_cb(void *opaque, int ret)
+{
+ QEDFindClusterCB *find_cluster_cb = opaque;
+ BDRVQEDState *s = find_cluster_cb->s;
+ QEDRequest *request = find_cluster_cb->request;
+ uint64_t offset = 0;
+ size_t len = 0;
+ unsigned int index;
+ unsigned int n;
+
+ if (ret) {
+ goto out;
+ }
+
+ index = qed_l2_index(s, find_cluster_cb->pos);
+ n = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, find_cluster_cb->pos) +
+ find_cluster_cb->len);
+ n = qed_count_contiguous_clusters(s, request->l2_table->table,
+ index, n, &offset);
+
+ if (qed_offset_is_unalloc_cluster(offset)) {
+ ret = QED_CLUSTER_L2;
+ } else if (qed_offset_is_zero_cluster(offset)) {
+ ret = QED_CLUSTER_ZERO;
+ } else if (qed_check_cluster_offset(s, offset)) {
+ ret = QED_CLUSTER_FOUND;
+ } else {
+ ret = -EINVAL;
+ }
+
+ len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
+ qed_offset_into_cluster(s, find_cluster_cb->pos));
+
+out:
+ find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
+ g_free(find_cluster_cb);
+}
+
+/**
+ * Find the offset of a data cluster
+ *
+ * @s: QED state
+ * @request: L2 cache entry
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function translates a position in the block device to an offset in the
+ * image file. It invokes the cb completion callback to report back the
+ * translated offset or unallocated range in the image file.
+ *
+ * If the L2 table exists, request->l2_table points to the L2 table cache entry
+ * and the caller must free the reference when they are finished. The cache
+ * entry is exposed in this way to avoid callers having to read the L2 table
+ * again later during request processing. If request->l2_table is non-NULL it
+ * will be unreferenced before taking on the new cache entry.
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque)
+{
+ QEDFindClusterCB *find_cluster_cb;
+ uint64_t l2_offset;
+
+ /* Limit length to L2 boundary. Requests are broken up at the L2 boundary
+ * so that a request acts on one L2 table at a time.
+ */
+ len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
+
+ l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
+ if (qed_offset_is_unalloc_cluster(l2_offset)) {
+ cb(opaque, QED_CLUSTER_L1, 0, len);
+ return;
+ }
+ if (!qed_check_table_offset(s, l2_offset)) {
+ cb(opaque, -EINVAL, 0, 0);
+ return;
+ }
+
+ find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
+ find_cluster_cb->s = s;
+ find_cluster_cb->pos = pos;
+ find_cluster_cb->len = len;
+ find_cluster_cb->cb = cb;
+ find_cluster_cb->opaque = opaque;
+ find_cluster_cb->request = request;
+
+ qed_read_l2_table(s, request, l2_offset,
+ qed_find_cluster_cb, find_cluster_cb);
+}
diff --git a/contrib/qemu/block/qed-gencb.c b/contrib/qemu/block/qed-gencb.c
new file mode 100644
index 000000000..7d7ac1ffc
--- /dev/null
+++ b/contrib/qemu/block/qed-gencb.c
@@ -0,0 +1,32 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qed.h"
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque)
+{
+ GenericCB *gencb = g_malloc(len);
+ gencb->cb = cb;
+ gencb->opaque = opaque;
+ return gencb;
+}
+
+void gencb_complete(void *opaque, int ret)
+{
+ GenericCB *gencb = opaque;
+ BlockDriverCompletionFunc *cb = gencb->cb;
+ void *user_opaque = gencb->opaque;
+
+ g_free(gencb);
+ cb(user_opaque, ret);
+}
diff --git a/contrib/qemu/block/qed-l2-cache.c b/contrib/qemu/block/qed-l2-cache.c
new file mode 100644
index 000000000..e9b2aae44
--- /dev/null
+++ b/contrib/qemu/block/qed-l2-cache.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU Enhanced Disk Format L2 Cache
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+/*
+ * L2 table cache usage is as follows:
+ *
+ * An open image has one L2 table cache that is used to avoid accessing the
+ * image file for recently referenced L2 tables.
+ *
+ * Cluster offset lookup translates the logical offset within the block device
+ * to a cluster offset within the image file. This is done by indexing into
+ * the L1 and L2 tables which store cluster offsets. It is here where the L2
+ * table cache serves up recently referenced L2 tables.
+ *
+ * If there is a cache miss, that L2 table is read from the image file and
+ * committed to the cache. Subsequent accesses to that L2 table will be served
+ * from the cache until the table is evicted from the cache.
+ *
+ * L2 tables are also committed to the cache when new L2 tables are allocated
+ * in the image file. Since the L2 table cache is write-through, the new L2
+ * table is first written out to the image file and then committed to the
+ * cache.
+ *
+ * Multiple I/O requests may be using an L2 table cache entry at any given
+ * time. That means an entry may be in use across several requests and
+ * reference counting is needed to free the entry at the correct time. In
+ * particular, an entry evicted from the cache will only be freed once all
+ * references are dropped.
+ *
+ * An in-flight I/O request will hold a reference to a L2 table cache entry for
+ * the period during which it needs to access the L2 table. This includes
+ * cluster offset lookup, L2 table allocation, and L2 table update when a new
+ * data cluster has been allocated.
+ *
+ * An interesting case occurs when two requests need to access an L2 table that
+ * is not in the cache. Since the operation to read the table from the image
+ * file takes some time to complete, both requests may see a cache miss and
+ * start reading the L2 table from the image file. The first to finish will
+ * commit its L2 table into the cache. When the second tries to commit its
+ * table will be deleted in favor of the existing cache entry.
+ */
+
+#include "trace.h"
+#include "qed.h"
+
+/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
+#define MAX_L2_CACHE_SIZE 50
+
+/**
+ * Initialize the L2 cache
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache)
+{
+ QTAILQ_INIT(&l2_cache->entries);
+ l2_cache->n_entries = 0;
+}
+
+/**
+ * Free the L2 cache
+ */
+void qed_free_l2_cache(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry, *next_entry;
+
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Allocate an uninitialized entry from the cache
+ *
+ * The returned entry has a reference count of 1 and is owned by the caller.
+ * The caller must allocate the actual table field for this entry and it must
+ * be freeable using qemu_vfree().
+ */
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
+{
+ CachedL2Table *entry;
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->ref++;
+
+ trace_qed_alloc_l2_cache_entry(l2_cache, entry);
+
+ return entry;
+}
+
+/**
+ * Decrease an entry's reference count and free if necessary when the reference
+ * count drops to zero.
+ */
+void qed_unref_l2_cache_entry(CachedL2Table *entry)
+{
+ if (!entry) {
+ return;
+ }
+
+ entry->ref--;
+ trace_qed_unref_l2_cache_entry(entry, entry->ref);
+ if (entry->ref == 0) {
+ qemu_vfree(entry->table);
+ g_free(entry);
+ }
+}
+
+/**
+ * Find an entry in the L2 cache. This may return NULL and it's up to the
+ * caller to satisfy the cache miss.
+ *
+ * For a cached entry, this function increases the reference count and returns
+ * the entry.
+ */
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
+{
+ CachedL2Table *entry;
+
+ QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
+ if (entry->offset == offset) {
+ trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
+ entry->ref++;
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+/**
+ * Commit an L2 cache entry into the cache. This is meant to be used as part of
+ * the process to satisfy a cache miss. A caller would allocate an entry which
+ * is not actually in the L2 cache and then once the entry was valid and
+ * present on disk, the entry can be committed into the cache.
+ *
+ * Since the cache is write-through, it's important that this function is not
+ * called until the entry is present on disk and the L1 has been updated to
+ * point to the entry.
+ *
+ * N.B. This function steals a reference to the l2_table from the caller so the
+ * caller must obtain a new reference by issuing a call to
+ * qed_find_l2_cache_entry().
+ */
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
+{
+ CachedL2Table *entry;
+
+ entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
+ if (entry) {
+ qed_unref_l2_cache_entry(entry);
+ qed_unref_l2_cache_entry(l2_table);
+ return;
+ }
+
+ /* Evict an unused cache entry so we have space. If all entries are in use
+ * we can grow the cache temporarily and we try to shrink back down later.
+ */
+ if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
+ CachedL2Table *next;
+ QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
+ if (entry->ref > 1) {
+ continue;
+ }
+
+ QTAILQ_REMOVE(&l2_cache->entries, entry, node);
+ l2_cache->n_entries--;
+ qed_unref_l2_cache_entry(entry);
+
+ /* Stop evicting when we've shrunk back to max size */
+ if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
+ break;
+ }
+ }
+ }
+
+ l2_cache->n_entries++;
+ QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
+}
diff --git a/contrib/qemu/block/qed-table.c b/contrib/qemu/block/qed-table.c
new file mode 100644
index 000000000..76d2dcccf
--- /dev/null
+++ b/contrib/qemu/block/qed-table.c
@@ -0,0 +1,296 @@
+/*
+ * QEMU Enhanced Disk Format Table I/O
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
+#include "qed.h"
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *table;
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDReadTableCB;
+
+static void qed_read_table_cb(void *opaque, int ret)
+{
+ QEDReadTableCB *read_table_cb = opaque;
+ QEDTable *table = read_table_cb->table;
+ int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
+ int i;
+
+ /* Handle I/O error */
+ if (ret) {
+ goto out;
+ }
+
+ /* Byteswap offsets */
+ for (i = 0; i < noffsets; i++) {
+ table->offsets[i] = le64_to_cpu(table->offsets[i]);
+ }
+
+out:
+ /* Completion */
+ trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
+ gencb_complete(&read_table_cb->gencb, ret);
+}
+
+static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
+ cb, opaque);
+ QEMUIOVector *qiov = &read_table_cb->qiov;
+
+ trace_qed_read_table(s, offset, table);
+
+ read_table_cb->s = s;
+ read_table_cb->table = table;
+ read_table_cb->iov.iov_base = table->offsets,
+ read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
+
+ qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
+ bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+ qiov->size / BDRV_SECTOR_SIZE,
+ qed_read_table_cb, read_table_cb);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEDTable *orig_table;
+ QEDTable *table;
+ bool flush; /* flush after write? */
+
+ struct iovec iov;
+ QEMUIOVector qiov;
+} QEDWriteTableCB;
+
+static void qed_write_table_cb(void *opaque, int ret)
+{
+ QEDWriteTableCB *write_table_cb = opaque;
+
+ trace_qed_write_table_cb(write_table_cb->s,
+ write_table_cb->orig_table,
+ write_table_cb->flush,
+ ret);
+
+ if (ret) {
+ goto out;
+ }
+
+ if (write_table_cb->flush) {
+ /* We still need to flush first */
+ write_table_cb->flush = false;
+ bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
+ write_table_cb);
+ return;
+ }
+
+out:
+ qemu_vfree(write_table_cb->table);
+ gencb_complete(&write_table_cb->gencb, ret);
+}
+
+/**
+ * Write out an updated part or all of a table
+ *
+ * @s: QED state
+ * @offset: Offset of table in image file, in bytes
+ * @table: Table
+ * @index: Index of first element
+ * @n: Number of elements
+ * @flush: Whether or not to sync to disk
+ * @cb: Completion function
+ * @opaque: Argument for completion function
+ */
+static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDWriteTableCB *write_table_cb;
+ unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
+ unsigned int start, end, i;
+ size_t len_bytes;
+
+ trace_qed_write_table(s, offset, table, index, n);
+
+ /* Calculate indices of the first and one after last elements */
+ start = index & ~sector_mask;
+ end = (index + n + sector_mask) & ~sector_mask;
+
+ len_bytes = (end - start) * sizeof(uint64_t);
+
+ write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
+ write_table_cb->s = s;
+ write_table_cb->orig_table = table;
+ write_table_cb->flush = flush;
+ write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
+ write_table_cb->iov.iov_base = write_table_cb->table->offsets;
+ write_table_cb->iov.iov_len = len_bytes;
+ qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
+
+ /* Byteswap table */
+ for (i = start; i < end; i++) {
+ uint64_t le_offset = cpu_to_le64(table->offsets[i]);
+ write_table_cb->table->offsets[i - start] = le_offset;
+ }
+
+ /* Adjust for offset into table */
+ offset += start * sizeof(uint64_t);
+
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &write_table_cb->qiov,
+ write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_write_table_cb, write_table_cb);
+}
+
+/**
+ * Propagate return value from async callback
+ */
+static void qed_sync_cb(void *opaque, int ret)
+{
+ *(int *)opaque = ret;
+}
+
+int qed_read_l1_table_sync(BDRVQEDState *s)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_table(s, s->header.l1_table_offset,
+ s->l1_table, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
+ qed_write_table(s, s->header.l1_table_offset,
+ s->l1_table, index, n, false, cb, opaque);
+}
+
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ uint64_t l2_offset;
+ QEDRequest *request;
+} QEDReadL2TableCB;
+
+static void qed_read_l2_table_cb(void *opaque, int ret)
+{
+ QEDReadL2TableCB *read_l2_table_cb = opaque;
+ QEDRequest *request = read_l2_table_cb->request;
+ BDRVQEDState *s = read_l2_table_cb->s;
+ CachedL2Table *l2_table = request->l2_table;
+ uint64_t l2_offset = read_l2_table_cb->l2_offset;
+
+ if (ret) {
+ /* can't trust loaded L2 table anymore */
+ qed_unref_l2_cache_entry(l2_table);
+ request->l2_table = NULL;
+ } else {
+ l2_table->offset = l2_offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry
+ * to the cache.
+ */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(request->l2_table != NULL);
+ }
+
+ gencb_complete(&read_l2_table_cb->gencb, ret);
+}
+
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ QEDReadL2TableCB *read_l2_table_cb;
+
+ qed_unref_l2_cache_entry(request->l2_table);
+
+ /* Check for cached L2 entry */
+ request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
+ if (request->l2_table) {
+ cb(opaque, 0);
+ return;
+ }
+
+ request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+ request->l2_table->table = qed_alloc_table(s);
+
+ read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
+ read_l2_table_cb->s = s;
+ read_l2_table_cb->l2_offset = offset;
+ read_l2_table_cb->request = request;
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
+ qed_read_table(s, offset, request->l2_table->table,
+ qed_read_l2_table_cb, read_l2_table_cb);
+}
+
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
+{
+ int ret = -EINPROGRESS;
+
+ qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
+
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
+ qed_write_table(s, request->l2_table->offset,
+ request->l2_table->table, index, n, flush, cb, opaque);
+}
+
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush)
+{
+ int ret = -EINPROGRESS;
+
+ qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
+ while (ret == -EINPROGRESS) {
+ qemu_aio_wait();
+ }
+
+ return ret;
+}
diff --git a/contrib/qemu/block/qed.c b/contrib/qemu/block/qed.c
new file mode 100644
index 000000000..f767b0528
--- /dev/null
+++ b/contrib/qemu/block/qed.c
@@ -0,0 +1,1596 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/timer.h"
+#include "trace.h"
+#include "qed.h"
+#include "qapi/qmp/qerror.h"
+#include "migration/migration.h"
+
+static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+ bool finished = false;
+
+ /* Wait for the request to finish */
+ acb->finished = &finished;
+ while (!finished) {
+ qemu_aio_wait();
+ }
+}
+
+static const AIOCBInfo qed_aiocb_info = {
+ .aiocb_size = sizeof(QEDAIOCB),
+ .cancel = qed_aio_cancel,
+};
+
+static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
+ const char *filename)
+{
+ const QEDHeader *header = (const QEDHeader *)buf;
+
+ if (buf_size < sizeof(*header)) {
+ return 0;
+ }
+ if (le32_to_cpu(header->magic) != QED_MAGIC) {
+ return 0;
+ }
+ return 100;
+}
+
+/**
+ * Check whether an image format is raw
+ *
+ * @fmt: Backing file format, may be NULL
+ */
+static bool qed_fmt_is_raw(const char *fmt)
+{
+ return fmt && strcmp(fmt, "raw") == 0;
+}
+
+static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
+{
+ cpu->magic = le32_to_cpu(le->magic);
+ cpu->cluster_size = le32_to_cpu(le->cluster_size);
+ cpu->table_size = le32_to_cpu(le->table_size);
+ cpu->header_size = le32_to_cpu(le->header_size);
+ cpu->features = le64_to_cpu(le->features);
+ cpu->compat_features = le64_to_cpu(le->compat_features);
+ cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
+ cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
+ cpu->image_size = le64_to_cpu(le->image_size);
+ cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
+ cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
+}
+
+static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
+{
+ le->magic = cpu_to_le32(cpu->magic);
+ le->cluster_size = cpu_to_le32(cpu->cluster_size);
+ le->table_size = cpu_to_le32(cpu->table_size);
+ le->header_size = cpu_to_le32(cpu->header_size);
+ le->features = cpu_to_le64(cpu->features);
+ le->compat_features = cpu_to_le64(cpu->compat_features);
+ le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
+ le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
+ le->image_size = cpu_to_le64(cpu->image_size);
+ le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
+ le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
+}
+
+int qed_write_header_sync(BDRVQEDState *s)
+{
+ QEDHeader le;
+ int ret;
+
+ qed_header_cpu_to_le(&s->header, &le);
+ ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+ if (ret != sizeof(le)) {
+ return ret;
+ }
+ return 0;
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ struct iovec iov;
+ QEMUIOVector qiov;
+ int nsectors;
+ uint8_t *buf;
+} QEDWriteHeaderCB;
+
+static void qed_write_header_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+
+ qemu_vfree(write_header_cb->buf);
+ gencb_complete(write_header_cb, ret);
+}
+
+static void qed_write_header_read_cb(void *opaque, int ret)
+{
+ QEDWriteHeaderCB *write_header_cb = opaque;
+ BDRVQEDState *s = write_header_cb->s;
+
+ if (ret) {
+ qed_write_header_cb(write_header_cb, ret);
+ return;
+ }
+
+ /* Update header */
+ qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
+
+ bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+ write_header_cb->nsectors, qed_write_header_cb,
+ write_header_cb);
+}
+
+/**
+ * Update header in-place (does not rewrite backing filename or other strings)
+ *
+ * This function only updates known header fields in-place and does not affect
+ * extra data after the QED header.
+ */
+static void qed_write_header(BDRVQEDState *s, BlockDriverCompletionFunc cb,
+ void *opaque)
+{
+ /* We must write full sectors for O_DIRECT but cannot necessarily generate
+ * the data following the header if an unrecognized compat feature is
+ * active. Therefore, first read the sectors containing the header, update
+ * them, and write back.
+ */
+
+ int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
+ BDRV_SECTOR_SIZE;
+ size_t len = nsectors * BDRV_SECTOR_SIZE;
+ QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
+ cb, opaque);
+
+ write_header_cb->s = s;
+ write_header_cb->nsectors = nsectors;
+ write_header_cb->buf = qemu_blockalign(s->bs, len);
+ write_header_cb->iov.iov_base = write_header_cb->buf;
+ write_header_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
+
+ bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+ qed_write_header_read_cb, write_header_cb);
+}
+
+static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
+{
+ uint64_t table_entries;
+ uint64_t l2_size;
+
+ table_entries = (table_size * cluster_size) / sizeof(uint64_t);
+ l2_size = table_entries * cluster_size;
+
+ return l2_size * table_entries;
+}
+
+static bool qed_is_cluster_size_valid(uint32_t cluster_size)
+{
+ if (cluster_size < QED_MIN_CLUSTER_SIZE ||
+ cluster_size > QED_MAX_CLUSTER_SIZE) {
+ return false;
+ }
+ if (cluster_size & (cluster_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_table_size_valid(uint32_t table_size)
+{
+ if (table_size < QED_MIN_TABLE_SIZE ||
+ table_size > QED_MAX_TABLE_SIZE) {
+ return false;
+ }
+ if (table_size & (table_size - 1)) {
+ return false; /* not power of 2 */
+ }
+ return true;
+}
+
+static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
+ uint32_t table_size)
+{
+ if (image_size % BDRV_SECTOR_SIZE != 0) {
+ return false; /* not multiple of sector size */
+ }
+ if (image_size > qed_max_image_size(cluster_size, table_size)) {
+ return false; /* image is too large */
+ }
+ return true;
+}
+
+/**
+ * Read a string of known length from the image file
+ *
+ * @file: Image file
+ * @offset: File offset to start of string, in bytes
+ * @n: String length in bytes
+ * @buf: Destination buffer
+ * @buflen: Destination buffer length in bytes
+ * @ret: 0 on success, -errno on failure
+ *
+ * The string is NUL-terminated.
+ */
+static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+ char *buf, size_t buflen)
+{
+ int ret;
+ if (n >= buflen) {
+ return -EINVAL;
+ }
+ ret = bdrv_pread(file, offset, buf, n);
+ if (ret < 0) {
+ return ret;
+ }
+ buf[n] = '\0';
+ return 0;
+}
+
+/**
+ * Allocate new clusters
+ *
+ * @s: QED state
+ * @n: Number of contiguous clusters to allocate
+ * @ret: Offset of first allocated cluster
+ *
+ * This function only produces the offset where the new clusters should be
+ * written. It updates BDRVQEDState but does not make any changes to the image
+ * file.
+ */
+static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
+{
+ uint64_t offset = s->file_size;
+ s->file_size += n * s->header.cluster_size;
+ return offset;
+}
+
+QEDTable *qed_alloc_table(BDRVQEDState *s)
+{
+ /* Honor O_DIRECT memory alignment requirements */
+ return qemu_blockalign(s->bs,
+ s->header.cluster_size * s->header.table_size);
+}
+
+/**
+ * Allocate a new zeroed L2 table
+ */
+static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
+{
+ CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
+
+ l2_table->table = qed_alloc_table(s);
+ l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
+
+ memset(l2_table->table->offsets, 0,
+ s->header.cluster_size * s->header.table_size);
+ return l2_table;
+}
+
+static void qed_aio_next_io(void *opaque, int ret);
+
+static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
+{
+ assert(!s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = true;
+}
+
+static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+{
+ QEDAIOCB *acb;
+
+ assert(s->allocating_write_reqs_plugged);
+
+ s->allocating_write_reqs_plugged = false;
+
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ }
+}
+
+static void qed_finish_clear_need_check(void *opaque, int ret)
+{
+ /* Do nothing */
+}
+
+static void qed_flush_after_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
+
+ /* No need to wait until flush completes */
+ qed_unplug_allocating_write_reqs(s);
+}
+
+static void qed_clear_need_check(void *opaque, int ret)
+{
+ BDRVQEDState *s = opaque;
+
+ if (ret) {
+ qed_unplug_allocating_write_reqs(s);
+ return;
+ }
+
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header(s, qed_flush_after_clear_need_check, s);
+}
+
+static void qed_need_check_timer_cb(void *opaque)
+{
+ BDRVQEDState *s = opaque;
+
+ /* The timer should only fire when allocating writes have drained */
+ assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
+
+ trace_qed_need_check_timer_cb(s);
+
+ qed_plug_allocating_write_reqs(s);
+
+ /* Ensure writes are on disk before clearing flag */
+ bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
+static void qed_start_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_start_need_check_timer(s);
+
+ /* Use vm_clock so we don't alter the image file while suspended for
+ * migration.
+ */
+ qemu_mod_timer(s->need_check_timer, qemu_get_clock_ns(vm_clock) +
+ get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+}
+
+/* It's okay to call this multiple times or when no timer is started */
+static void qed_cancel_need_check_timer(BDRVQEDState *s)
+{
+ trace_qed_cancel_need_check_timer(s);
+ qemu_del_timer(s->need_check_timer);
+}
+
+static void bdrv_qed_rebind(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ s->bs = bs;
+}
+
+static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader le_header;
+ int64_t file_size;
+ int ret;
+
+ s->bs = bs;
+ QSIMPLEQ_INIT(&s->allocating_write_reqs);
+
+ ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ return ret;
+ }
+ qed_header_le_to_cpu(&le_header, &s->header);
+
+ if (s->header.magic != QED_MAGIC) {
+ return -EMEDIUMTYPE;
+ }
+ if (s->header.features & ~QED_FEATURE_MASK) {
+ /* image uses unsupported feature bits */
+ char buf[64];
+ snprintf(buf, sizeof(buf), "%" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
+ qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
+ bs->device_name, "QED", buf);
+ return -ENOTSUP;
+ }
+ if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
+ return -EINVAL;
+ }
+
+ /* Round down file size to the last cluster */
+ file_size = bdrv_getlength(bs->file);
+ if (file_size < 0) {
+ return file_size;
+ }
+ s->file_size = qed_start_of_cluster(s, file_size);
+
+ if (!qed_is_table_size_valid(s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(s->header.image_size,
+ s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+ if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
+ return -EINVAL;
+ }
+
+ s->table_nelems = (s->header.cluster_size * s->header.table_size) /
+ sizeof(uint64_t);
+ s->l2_shift = ffs(s->header.cluster_size) - 1;
+ s->l2_mask = s->table_nelems - 1;
+ s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+
+ if ((s->header.features & QED_F_BACKING_FILE)) {
+ if ((uint64_t)s->header.backing_filename_offset +
+ s->header.backing_filename_size >
+ s->header.cluster_size * s->header.header_size) {
+ return -EINVAL;
+ }
+
+ ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+ s->header.backing_filename_size, bs->backing_file,
+ sizeof(bs->backing_file));
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
+ }
+ }
+
+ /* Reset unknown autoclear feature bits. This is a backwards
+ * compatibility mechanism that allows images to be opened by older
+ * programs, which "knock out" unknown feature bits. When an image is
+ * opened by a newer program again it can detect that the autoclear
+ * feature is no longer valid.
+ */
+ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
+ !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
+
+ ret = qed_write_header_sync(s);
+ if (ret) {
+ return ret;
+ }
+
+ /* From here on only known autoclear feature bits are valid */
+ bdrv_flush(bs->file);
+ }
+
+ s->l1_table = qed_alloc_table(s);
+ qed_init_l2_cache(&s->l2_cache);
+
+ ret = qed_read_l1_table_sync(s);
+ if (ret) {
+ goto out;
+ }
+
+ /* If image was not closed cleanly, check consistency */
+ if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
+ /* Read-only images cannot be fixed. There is no risk of corruption
+ * since write operations are not possible. Therefore, allow
+ * potentially inconsistent images to be opened read-only. This can
+ * aid data recovery from an otherwise inconsistent image.
+ */
+ if (!bdrv_is_read_only(bs->file) &&
+ !(flags & BDRV_O_INCOMING)) {
+ BdrvCheckResult result = {0};
+
+ ret = qed_check(s, &result, true);
+ if (ret) {
+ goto out;
+ }
+ }
+ }
+
+ s->need_check_timer = qemu_new_timer_ns(vm_clock,
+ qed_need_check_timer_cb, s);
+
+out:
+ if (ret) {
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+ }
+ return ret;
+}
+
+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static void bdrv_qed_close(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ qed_cancel_need_check_timer(s);
+ qemu_free_timer(s->need_check_timer);
+
+ /* Ensure writes reach stable storage */
+ bdrv_flush(bs->file);
+
+ /* Clean shutdown, no check required on next open */
+ if (s->header.features & QED_F_NEED_CHECK) {
+ s->header.features &= ~QED_F_NEED_CHECK;
+ qed_write_header_sync(s);
+ }
+
+ qed_free_l2_cache(&s->l2_cache);
+ qemu_vfree(s->l1_table);
+}
+
+static int qed_create(const char *filename, uint32_t cluster_size,
+ uint64_t image_size, uint32_t table_size,
+ const char *backing_file, const char *backing_fmt)
+{
+ QEDHeader header = {
+ .magic = QED_MAGIC,
+ .cluster_size = cluster_size,
+ .table_size = table_size,
+ .header_size = 1,
+ .features = 0,
+ .compat_features = 0,
+ .l1_table_offset = cluster_size,
+ .image_size = image_size,
+ };
+ QEDHeader le_header;
+ uint8_t *l1_table = NULL;
+ size_t l1_size = header.cluster_size * header.table_size;
+ int ret = 0;
+ BlockDriverState *bs = NULL;
+
+ ret = bdrv_create_file(filename, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* File must start empty and grow, check truncate is supported */
+ ret = bdrv_truncate(bs, 0);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (backing_file) {
+ header.features |= QED_F_BACKING_FILE;
+ header.backing_filename_offset = sizeof(le_header);
+ header.backing_filename_size = strlen(backing_file);
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ qed_header_cpu_to_le(&header, &le_header);
+ ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ if (ret < 0) {
+ goto out;
+ }
+ ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
+ header.backing_filename_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ l1_table = g_malloc0(l1_size);
+ ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0; /* success */
+out:
+ g_free(l1_table);
+ bdrv_delete(bs);
+ return ret;
+}
+
+static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options)
+{
+ uint64_t image_size = 0;
+ uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
+ uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
+ const char *backing_file = NULL;
+ const char *backing_fmt = NULL;
+
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ image_size = options->value.n;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_fmt = options->value.s;
+ } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ if (options->value.n) {
+ cluster_size = options->value.n;
+ }
+ } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
+ if (options->value.n) {
+ table_size = options->value.n;
+ }
+ }
+ options++;
+ }
+
+ if (!qed_is_cluster_size_valid(cluster_size)) {
+ fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_table_size_valid(table_size)) {
+ fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
+ QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
+ return -EINVAL;
+ }
+ if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
+ fprintf(stderr, "QED image size must be a non-zero multiple of "
+ "cluster size and less than %" PRIu64 " bytes\n",
+ qed_max_image_size(cluster_size, table_size));
+ return -EINVAL;
+ }
+
+ return qed_create(filename, cluster_size, image_size, table_size,
+ backing_file, backing_fmt);
+}
+
+typedef struct {
+ Coroutine *co;
+ int is_allocated;
+ int *pnum;
+} QEDIsAllocatedCB;
+
+static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
+{
+ QEDIsAllocatedCB *cb = opaque;
+ *cb->pnum = len / BDRV_SECTOR_SIZE;
+ cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO);
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
+ QEDIsAllocatedCB cb = {
+ .is_allocated = -1,
+ .pnum = pnum,
+ };
+ QEDRequest request = { .l2_table = NULL };
+
+ qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb);
+
+ /* Now sleep if the callback wasn't invoked immediately */
+ while (cb.is_allocated == -1) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+
+ qed_unref_l2_cache_entry(request.l2_table);
+
+ return cb.is_allocated;
+}
+
+static int bdrv_qed_make_empty(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+
+static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
+{
+ return acb->common.bs->opaque;
+}
+
+/**
+ * Read from the backing file or zero-fill if no backing file
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @qiov: Destination I/O vector
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ *
+ * This function reads qiov->size bytes starting at pos from the backing file.
+ * If there is no backing file then zeroes are read.
+ */
+static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
+ QEMUIOVector *qiov,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ uint64_t backing_length = 0;
+ size_t size;
+
+ /* If there is a backing file, get its length. Treat the absence of a
+ * backing file like a zero length backing file.
+ */
+ if (s->bs->backing_hd) {
+ int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (l < 0) {
+ cb(opaque, l);
+ return;
+ }
+ backing_length = l;
+ }
+
+ /* Zero all sectors if reading beyond the end of the backing file */
+ if (pos >= backing_length ||
+ pos + qiov->size > backing_length) {
+ qemu_iovec_memset(qiov, 0, 0, qiov->size);
+ }
+
+ /* Complete now if there are no backing file sectors to read */
+ if (pos >= backing_length) {
+ cb(opaque, 0);
+ return;
+ }
+
+ /* If the read straddles the end of the backing file, shorten it */
+ size = MIN((uint64_t)backing_length - pos, qiov->size);
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
+ bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+}
+
+typedef struct {
+ GenericCB gencb;
+ BDRVQEDState *s;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ uint64_t offset;
+} CopyFromBackingFileCB;
+
+static void qed_copy_from_backing_file_cb(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ qemu_vfree(copy_cb->iov.iov_base);
+ gencb_complete(&copy_cb->gencb, ret);
+}
+
+static void qed_copy_from_backing_file_write(void *opaque, int ret)
+{
+ CopyFromBackingFileCB *copy_cb = opaque;
+ BDRVQEDState *s = copy_cb->s;
+
+ if (ret) {
+ qed_copy_from_backing_file_cb(copy_cb, ret);
+ return;
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
+ bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+ &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
+ qed_copy_from_backing_file_cb, copy_cb);
+}
+
+/**
+ * Copy data from backing file into the image
+ *
+ * @s: QED state
+ * @pos: Byte position in device
+ * @len: Number of bytes
+ * @offset: Byte offset in image file
+ * @cb: Completion function
+ * @opaque: User data for completion function
+ */
+static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
+ uint64_t len, uint64_t offset,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ CopyFromBackingFileCB *copy_cb;
+
+ /* Skip copy entirely if there is no work to do */
+ if (len == 0) {
+ cb(opaque, 0);
+ return;
+ }
+
+ copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
+ copy_cb->s = s;
+ copy_cb->offset = offset;
+ copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
+ copy_cb->iov.iov_len = len;
+ qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
+
+ qed_read_backing_file(s, pos, &copy_cb->qiov,
+ qed_copy_from_backing_file_write, copy_cb);
+}
+
+/**
+ * Link one or more contiguous clusters into a table
+ *
+ * @s: QED state
+ * @table: L2 table
+ * @index: First cluster index
+ * @n: Number of contiguous clusters
+ * @cluster: First cluster offset
+ *
+ * The cluster offset may be an allocated byte offset in the image file, the
+ * zero cluster marker, or the unallocated cluster marker.
+ */
+static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
+ unsigned int n, uint64_t cluster)
+{
+ int i;
+ for (i = index; i < index + n; i++) {
+ table->offsets[i] = cluster;
+ if (!qed_offset_is_unalloc_cluster(cluster) &&
+ !qed_offset_is_zero_cluster(cluster)) {
+ cluster += s->header.cluster_size;
+ }
+ }
+}
+
+static void qed_aio_complete_bh(void *opaque)
+{
+ QEDAIOCB *acb = opaque;
+ BlockDriverCompletionFunc *cb = acb->common.cb;
+ void *user_opaque = acb->common.opaque;
+ int ret = acb->bh_ret;
+ bool *finished = acb->finished;
+
+ qemu_bh_delete(acb->bh);
+ qemu_aio_release(acb);
+
+ /* Invoke callback */
+ cb(user_opaque, ret);
+
+ /* Signal cancel completion */
+ if (finished) {
+ *finished = true;
+ }
+}
+
+static void qed_aio_complete(QEDAIOCB *acb, int ret)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+
+ trace_qed_aio_complete(s, acb, ret);
+
+ /* Free resources */
+ qemu_iovec_destroy(&acb->cur_qiov);
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+
+ /* Free the buffer we may have allocated for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ qemu_vfree(acb->qiov->iov[0].iov_base);
+ acb->qiov->iov[0].iov_base = NULL;
+ }
+
+ /* Arrange for a bh to invoke the completion function */
+ acb->bh_ret = ret;
+ acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+
+ /* Start next allocating write request waiting behind this one. Note that
+ * requests enqueue themselves when they first hit an unallocated cluster
+ * but they wait until the entire request is finished before waking up the
+ * next request in the queue. This ensures that we don't cycle through
+ * requests multiple times but rather finish one at a time completely.
+ */
+ if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
+ acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
+ if (acb) {
+ qed_aio_next_io(acb, 0);
+ } else if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
+ }
+ }
+}
+
+/**
+ * Commit the current L2 table to the cache
+ */
+static void qed_commit_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ CachedL2Table *l2_table = acb->request.l2_table;
+ uint64_t l2_offset = l2_table->offset;
+
+ qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
+
+ /* This is guaranteed to succeed because we just committed the entry to the
+ * cache.
+ */
+ acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
+ assert(acb->request.l2_table != NULL);
+
+ qed_aio_next_io(opaque, ret);
+}
+
+/**
+ * Update L1 table with new L2 table offset and write it out
+ */
+static void qed_aio_write_l1_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ int index;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ index = qed_l1_index(s, acb->cur_pos);
+ s->l1_table->offsets[index] = acb->request.l2_table->offset;
+
+ qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
+}
+
+/**
+ * Update L2 table with new cluster offsets and write them out
+ */
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
+ int index;
+
+ if (ret) {
+ goto err;
+ }
+
+ if (need_alloc) {
+ qed_unref_l2_cache_entry(acb->request.l2_table);
+ acb->request.l2_table = qed_new_l2_table(s);
+ }
+
+ index = qed_l2_index(s, acb->cur_pos);
+ qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
+ offset);
+
+ if (need_alloc) {
+ /* Write out the whole new L2 table */
+ qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
+ qed_aio_write_l1_update, acb);
+ } else {
+ /* Write out only the updated part of the L2 table */
+ qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
+ qed_aio_next_io, acb);
+ }
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
+/**
+ * Flush new data clusters before updating the L2 table
+ *
+ * This flush is necessary when a backing file is in use. A crash during an
+ * allocating write could result in empty clusters in the image. If the write
+ * only touched a subregion of the cluster, then backing image sectors have
+ * been lost in the untouched region. The solution is to flush after writing a
+ * new data cluster and before updating the L2 table.
+ */
+static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ qed_aio_complete(acb, -EIO);
+ }
+}
+
+/**
+ * Write data to the image file
+ */
+static void qed_aio_write_main(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos);
+ BlockDriverCompletionFunc *next_fn;
+
+ trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
+ next_fn = qed_aio_next_io;
+ } else {
+ if (s->bs->backing_hd) {
+ next_fn = qed_aio_write_flush_before_l2_update;
+ } else {
+ next_fn = qed_aio_write_l2_update_cb;
+ }
+ }
+
+ BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ next_fn, acb);
+}
+
+/**
+ * Populate back untouched region of new data cluster
+ */
+static void qed_aio_write_postfill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = acb->cur_pos + acb->cur_qiov.size;
+ uint64_t len =
+ qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
+ uint64_t offset = acb->cur_cluster +
+ qed_offset_into_cluster(s, acb->cur_pos) +
+ acb->cur_qiov.size;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ trace_qed_aio_write_postfill(s, acb, start, len, offset);
+ qed_copy_from_backing_file(s, start, len, offset,
+ qed_aio_write_main, acb);
+}
+
+/**
+ * Populate front untouched region of new data cluster
+ */
+static void qed_aio_write_prefill(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
+ uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
+ qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
+ qed_aio_write_postfill, acb);
+}
+
+/**
+ * Check if the QED_F_NEED_CHECK bit should be set during allocating write
+ */
+static bool qed_should_set_need_check(BDRVQEDState *s)
+{
+ /* The flush before L2 update path ensures consistency */
+ if (s->bs->backing_hd) {
+ return false;
+ }
+
+ return !(s->header.features & QED_F_NEED_CHECK);
+}
+
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
+/**
+ * Write new data cluster
+ *
+ * @acb: Write request
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to previously unallocated clusters.
+ */
+static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
+{
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
+
+ /* Cancel timer when the first allocating request comes in */
+ if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
+ qed_cancel_need_check_timer(s);
+ }
+
+ /* Freeze this request if another allocating write is in progress */
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
+ QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
+ }
+ if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
+ s->allocating_write_reqs_plugged) {
+ return; /* wait for existing request to finish */
+ }
+
+ acb->cur_nclusters = qed_bytes_to_clusters(s,
+ qed_offset_into_cluster(s, acb->cur_pos) + len);
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ if (acb->flags & QED_AIOCB_ZERO) {
+ /* Skip ahead if the clusters are already zero */
+ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
+ qed_aio_next_io(acb, 0);
+ return;
+ }
+
+ cb = qed_aio_write_zero_cluster;
+ } else {
+ cb = qed_aio_write_prefill;
+ acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+ }
+
+ if (qed_should_set_need_check(s)) {
+ s->header.features |= QED_F_NEED_CHECK;
+ qed_write_header(s, cb, acb);
+ } else {
+ cb(acb, 0);
+ }
+}
+
+/**
+ * Write data cluster in place
+ *
+ * @acb: Write request
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * This path is taken when writing to already allocated clusters.
+ */
+static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
+{
+ /* Allocate buffer for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ struct iovec *iov = acb->qiov->iov;
+
+ if (!iov->iov_base) {
+ iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ memset(iov->iov_base, 0, iov->iov_len);
+ }
+ }
+
+ /* Calculate the I/O vector */
+ acb->cur_cluster = offset;
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Do the actual write */
+ qed_aio_write_main(acb, 0);
+}
+
+/**
+ * Write data cluster
+ *
+ * @opaque: Write request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_write_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+
+ trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
+
+ acb->find_cluster_ret = ret;
+
+ switch (ret) {
+ case QED_CLUSTER_FOUND:
+ qed_aio_write_inplace(acb, offset, len);
+ break;
+
+ case QED_CLUSTER_L2:
+ case QED_CLUSTER_L1:
+ case QED_CLUSTER_ZERO:
+ qed_aio_write_alloc(acb, len);
+ break;
+
+ default:
+ qed_aio_complete(acb, ret);
+ break;
+ }
+}
+
+/**
+ * Read data cluster
+ *
+ * @opaque: Read request
+ * @ret: QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
+ * or -errno
+ * @offset: Cluster offset in bytes
+ * @len: Length in bytes
+ *
+ * Callback from qed_find_cluster().
+ */
+static void qed_aio_read_data(void *opaque, int ret,
+ uint64_t offset, size_t len)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverState *bs = acb->common.bs;
+
+ /* Adjust offset into cluster */
+ offset += qed_offset_into_cluster(s, acb->cur_pos);
+
+ trace_qed_aio_read_data(s, acb, ret, offset, len);
+
+ if (ret < 0) {
+ goto err;
+ }
+
+ qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+
+ /* Handle zero cluster and backing file reads */
+ if (ret == QED_CLUSTER_ZERO) {
+ qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
+ qed_aio_next_io(acb, 0);
+ return;
+ } else if (ret != QED_CLUSTER_FOUND) {
+ qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
+ qed_aio_next_io, acb);
+ return;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+ &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
+ qed_aio_next_io, acb);
+ return;
+
+err:
+ qed_aio_complete(acb, ret);
+}
+
+/**
+ * Begin next I/O or complete the request
+ */
+static void qed_aio_next_io(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ BDRVQEDState *s = acb_to_s(acb);
+ QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
+ qed_aio_write_data : qed_aio_read_data;
+
+ trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
+
+ /* Handle I/O error */
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ acb->qiov_offset += acb->cur_qiov.size;
+ acb->cur_pos += acb->cur_qiov.size;
+ qemu_iovec_reset(&acb->cur_qiov);
+
+ /* Complete request */
+ if (acb->cur_pos >= acb->end_pos) {
+ qed_aio_complete(acb, 0);
+ return;
+ }
+
+ /* Find next cluster and start I/O */
+ qed_find_cluster(s, &acb->request,
+ acb->cur_pos, acb->end_pos - acb->cur_pos,
+ io_fn, acb);
+}
+
+static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, int flags)
+{
+ QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
+
+ trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
+ opaque, flags);
+
+ acb->flags = flags;
+ acb->finished = NULL;
+ acb->qiov = qiov;
+ acb->qiov_offset = 0;
+ acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
+ acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+ acb->request.l2_table = NULL;
+ qemu_iovec_init(&acb->cur_qiov, qiov->niov);
+
+ /* Start request */
+ qed_aio_next_io(acb, 0);
+ return &acb->common;
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, QED_AIOCB_WRITE);
+}
+
+typedef struct {
+ Coroutine *co;
+ int ret;
+ bool done;
+} QEDWriteZeroesCB;
+
+static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+{
+ QEDWriteZeroesCB *cb = opaque;
+
+ cb->done = true;
+ cb->ret = ret;
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverAIOCB *blockacb;
+ BDRVQEDState *s = bs->opaque;
+ QEDWriteZeroesCB cb = { .done = false };
+ QEMUIOVector qiov;
+ struct iovec iov;
+
+ /* Refuse if there are untouched backing file sectors */
+ if (bs->backing_hd) {
+ if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
+ return -ENOTSUP;
+ }
+ }
+
+ /* Zero writes start without an I/O buffer. If a buffer becomes necessary
+ * then it will be allocated during request processing.
+ */
+ iov.iov_base = NULL,
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
+ qed_co_write_zeroes_cb, &cb,
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
+ if (!blockacb) {
+ return -EIO;
+ }
+ if (!cb.done) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+ assert(cb.done);
+ return cb.ret;
+}
+
+static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BDRVQEDState *s = bs->opaque;
+ uint64_t old_image_size;
+ int ret;
+
+ if (!qed_is_image_size_valid(offset, s->header.cluster_size,
+ s->header.table_size)) {
+ return -EINVAL;
+ }
+
+ /* Shrinking is currently not supported */
+ if ((uint64_t)offset < s->header.image_size) {
+ return -ENOTSUP;
+ }
+
+ old_image_size = s->header.image_size;
+ s->header.image_size = offset;
+ ret = qed_write_header_sync(s);
+ if (ret < 0) {
+ s->header.image_size = old_image_size;
+ }
+ return ret;
+}
+
+static int64_t bdrv_qed_getlength(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+ return s->header.image_size;
+}
+
+static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ memset(bdi, 0, sizeof(*bdi));
+ bdi->cluster_size = s->header.cluster_size;
+ bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
+ return 0;
+}
+
+static int bdrv_qed_change_backing_file(BlockDriverState *bs,
+ const char *backing_file,
+ const char *backing_fmt)
+{
+ BDRVQEDState *s = bs->opaque;
+ QEDHeader new_header, le_header;
+ void *buffer;
+ size_t buffer_len, backing_file_len;
+ int ret;
+
+ /* Refuse to set backing filename if unknown compat feature bits are
+ * active. If the image uses an unknown compat feature then we may not
+ * know the layout of data following the header structure and cannot safely
+ * add a new string.
+ */
+ if (backing_file && (s->header.compat_features &
+ ~QED_COMPAT_FEATURE_MASK)) {
+ return -ENOTSUP;
+ }
+
+ memcpy(&new_header, &s->header, sizeof(new_header));
+
+ new_header.features &= ~(QED_F_BACKING_FILE |
+ QED_F_BACKING_FORMAT_NO_PROBE);
+
+ /* Adjust feature flags */
+ if (backing_file) {
+ new_header.features |= QED_F_BACKING_FILE;
+
+ if (qed_fmt_is_raw(backing_fmt)) {
+ new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
+ }
+ }
+
+ /* Calculate new header size */
+ backing_file_len = 0;
+
+ if (backing_file) {
+ backing_file_len = strlen(backing_file);
+ }
+
+ buffer_len = sizeof(new_header);
+ new_header.backing_filename_offset = buffer_len;
+ new_header.backing_filename_size = backing_file_len;
+ buffer_len += backing_file_len;
+
+ /* Make sure we can rewrite header without failing */
+ if (buffer_len > new_header.header_size * new_header.cluster_size) {
+ return -ENOSPC;
+ }
+
+ /* Prepare new header */
+ buffer = g_malloc(buffer_len);
+
+ qed_header_cpu_to_le(&new_header, &le_header);
+ memcpy(buffer, &le_header, sizeof(le_header));
+ buffer_len = sizeof(le_header);
+
+ if (backing_file) {
+ memcpy(buffer + buffer_len, backing_file, backing_file_len);
+ buffer_len += backing_file_len;
+ }
+
+ /* Write new header */
+ ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+ g_free(buffer);
+ if (ret == 0) {
+ memcpy(&s->header, &new_header, sizeof(new_header));
+ }
+ return ret;
+}
+
+static void bdrv_qed_invalidate_cache(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ bdrv_qed_close(bs);
+ memset(s, 0, sizeof(BDRVQEDState));
+ bdrv_qed_open(bs, NULL, bs->open_flags);
+}
+
+static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ return qed_check(s, result, !!fix);
+}
+
+static QEMUOptionParameter qed_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size (in bytes)"
+ }, {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = OPT_STRING,
+ .help = "File name of a base image"
+ }, {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = OPT_STRING,
+ .help = "Image format of the base image"
+ }, {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = OPT_SIZE,
+ .help = "Cluster size (in bytes)",
+ .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
+ }, {
+ .name = BLOCK_OPT_TABLE_SIZE,
+ .type = OPT_SIZE,
+ .help = "L1/L2 table size (in clusters)"
+ },
+ { /* end of list */ }
+};
+
+static BlockDriver bdrv_qed = {
+ .format_name = "qed",
+ .instance_size = sizeof(BDRVQEDState),
+ .create_options = qed_create_options,
+
+ .bdrv_probe = bdrv_qed_probe,
+ .bdrv_rebind = bdrv_qed_rebind,
+ .bdrv_open = bdrv_qed_open,
+ .bdrv_close = bdrv_qed_close,
+ .bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
+ .bdrv_create = bdrv_qed_create,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_is_allocated = bdrv_qed_co_is_allocated,
+ .bdrv_make_empty = bdrv_qed_make_empty,
+ .bdrv_aio_readv = bdrv_qed_aio_readv,
+ .bdrv_aio_writev = bdrv_qed_aio_writev,
+ .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
+ .bdrv_truncate = bdrv_qed_truncate,
+ .bdrv_getlength = bdrv_qed_getlength,
+ .bdrv_get_info = bdrv_qed_get_info,
+ .bdrv_change_backing_file = bdrv_qed_change_backing_file,
+ .bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
+ .bdrv_check = bdrv_qed_check,
+};
+
+static void bdrv_qed_init(void)
+{
+ bdrv_register(&bdrv_qed);
+}
+
+block_init(bdrv_qed_init);
diff --git a/contrib/qemu/block/qed.h b/contrib/qemu/block/qed.h
new file mode 100644
index 000000000..2b4ddedf3
--- /dev/null
+++ b/contrib/qemu/block/qed.h
@@ -0,0 +1,344 @@
+/*
+ * QEMU Enhanced Disk Format
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef BLOCK_QED_H
+#define BLOCK_QED_H
+
+#include "block/block_int.h"
+
+/* The layout of a QED file is as follows:
+ *
+ * +--------+----------+----------+----------+-----+
+ * | header | L1 table | cluster0 | cluster1 | ... |
+ * +--------+----------+----------+----------+-----+
+ *
+ * There is a 2-level pagetable for cluster allocation:
+ *
+ * +----------+
+ * | L1 table |
+ * +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | L2 table | ... | L2 table |
+ * +----------+ +----------+
+ * ,------' | '------.
+ * +----------+ | +----------+
+ * | Data | ... | Data |
+ * +----------+ +----------+
+ *
+ * The L1 table is fixed size and always present. L2 tables are allocated on
+ * demand. The L1 table size determines the maximum possible image size; it
+ * can be influenced using the cluster_size and table_size values.
+ *
+ * All fields are little-endian on disk.
+ */
+
+enum {
+ QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
+
+ /* The image supports a backing file */
+ QED_F_BACKING_FILE = 0x01,
+
+ /* The image needs a consistency check before use */
+ QED_F_NEED_CHECK = 0x02,
+
+ /* The backing file format must not be probed, treat as raw image */
+ QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
+
+ /* Feature bits must be used when the on-disk format changes */
+ QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
+ QED_F_NEED_CHECK |
+ QED_F_BACKING_FORMAT_NO_PROBE,
+ QED_COMPAT_FEATURE_MASK = 0, /* supported compat feature bits */
+ QED_AUTOCLEAR_FEATURE_MASK = 0, /* supported autoclear feature bits */
+
+ /* Data is stored in groups of sectors called clusters. Cluster size must
+ * be large to avoid keeping too much metadata. I/O requests that have
+ * sub-cluster size will require read-modify-write.
+ */
+ QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
+ QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
+ QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
+
+ /* Allocated clusters are tracked using a 2-level pagetable. Table size is
+ * a multiple of clusters so large maximum image sizes can be supported
+ * without jacking up the cluster size too much.
+ */
+ QED_MIN_TABLE_SIZE = 1, /* in clusters */
+ QED_MAX_TABLE_SIZE = 16,
+ QED_DEFAULT_TABLE_SIZE = 4,
+
+ /* Delay to flush and clean image after last allocating write completes */
+ QED_NEED_CHECK_TIMEOUT = 5, /* in seconds */
+};
+
+typedef struct {
+ uint32_t magic; /* QED\0 */
+
+ uint32_t cluster_size; /* in bytes */
+ uint32_t table_size; /* for L1 and L2 tables, in clusters */
+ uint32_t header_size; /* in clusters */
+
+ uint64_t features; /* format feature bits */
+ uint64_t compat_features; /* compatible feature bits */
+ uint64_t autoclear_features; /* self-resetting feature bits */
+
+ uint64_t l1_table_offset; /* in bytes */
+ uint64_t image_size; /* total logical image size, in bytes */
+
+ /* if (features & QED_F_BACKING_FILE) */
+ uint32_t backing_filename_offset; /* in bytes from start of header */
+ uint32_t backing_filename_size; /* in bytes */
+} QEDHeader;
+
+typedef struct {
+ uint64_t offsets[0]; /* in bytes */
+} QEDTable;
+
+/* The L2 cache is a simple write-through cache for L2 structures */
+typedef struct CachedL2Table {
+ QEDTable *table;
+ uint64_t offset; /* offset=0 indicates an invalidate entry */
+ QTAILQ_ENTRY(CachedL2Table) node;
+ int ref;
+} CachedL2Table;
+
+typedef struct {
+ QTAILQ_HEAD(, CachedL2Table) entries;
+ unsigned int n_entries;
+} L2TableCache;
+
+typedef struct QEDRequest {
+ CachedL2Table *l2_table;
+} QEDRequest;
+
+enum {
+ QED_AIOCB_WRITE = 0x0001, /* read or write? */
+ QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
+};
+
+typedef struct QEDAIOCB {
+ BlockDriverAIOCB common;
+ QEMUBH *bh;
+ int bh_ret; /* final return status for completion bh */
+ QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
+ int flags; /* QED_AIOCB_* bits ORed together */
+ bool *finished; /* signal for cancel completion */
+ uint64_t end_pos; /* request end on block device, in bytes */
+
+ /* User scatter-gather list */
+ QEMUIOVector *qiov;
+ size_t qiov_offset; /* byte count already processed */
+
+ /* Current cluster scatter-gather list */
+ QEMUIOVector cur_qiov;
+ uint64_t cur_pos; /* position on block device, in bytes */
+ uint64_t cur_cluster; /* cluster offset in image file */
+ unsigned int cur_nclusters; /* number of clusters being accessed */
+ int find_cluster_ret; /* used for L1/L2 update */
+
+ QEDRequest request;
+} QEDAIOCB;
+
+typedef struct {
+ BlockDriverState *bs; /* device */
+ uint64_t file_size; /* length of image file, in bytes */
+
+ QEDHeader header; /* always cpu-endian */
+ QEDTable *l1_table;
+ L2TableCache l2_cache; /* l2 table cache */
+ uint32_t table_nelems;
+ uint32_t l1_shift;
+ uint32_t l2_shift;
+ uint32_t l2_mask;
+
+ /* Allocating write request queue */
+ QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
+ bool allocating_write_reqs_plugged;
+
+ /* Periodic flush and clear need check flag */
+ QEMUTimer *need_check_timer;
+} BDRVQEDState;
+
+enum {
+ QED_CLUSTER_FOUND, /* cluster found */
+ QED_CLUSTER_ZERO, /* zero cluster found */
+ QED_CLUSTER_L2, /* cluster missing in L2 */
+ QED_CLUSTER_L1, /* cluster missing in L1 */
+};
+
+/**
+ * qed_find_cluster() completion callback
+ *
+ * @opaque: User data for completion callback
+ * @ret: QED_CLUSTER_FOUND Success
+ * QED_CLUSTER_L2 Data cluster unallocated in L2
+ * QED_CLUSTER_L1 L2 unallocated in L1
+ * -errno POSIX error occurred
+ * @offset: Data cluster offset
+ * @len: Contiguous bytes starting from cluster offset
+ *
+ * This function is invoked when qed_find_cluster() completes.
+ *
+ * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
+ * in the image file.
+ *
+ * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
+ * table offset, respectively. len is number of contiguous unallocated bytes.
+ */
+typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
+
+/**
+ * Generic callback for chaining async callbacks
+ */
+typedef struct {
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+} GenericCB;
+
+void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
+void gencb_complete(void *opaque, int ret);
+
+/**
+ * Header functions
+ */
+int qed_write_header_sync(BDRVQEDState *s);
+
+/**
+ * L2 cache functions
+ */
+void qed_init_l2_cache(L2TableCache *l2_cache);
+void qed_free_l2_cache(L2TableCache *l2_cache);
+CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
+void qed_unref_l2_cache_entry(CachedL2Table *entry);
+CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
+void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
+
+/**
+ * Table I/O functions
+ */
+int qed_read_l1_table_sync(BDRVQEDState *s);
+void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
+ unsigned int n);
+int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ uint64_t offset);
+void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush,
+ BlockDriverCompletionFunc *cb, void *opaque);
+int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
+ unsigned int index, unsigned int n, bool flush);
+
+/**
+ * Cluster functions
+ */
+void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
+ size_t len, QEDFindClusterFunc *cb, void *opaque);
+
+/**
+ * Consistency check
+ */
+int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
+
+QEDTable *qed_alloc_table(BDRVQEDState *s);
+
+/**
+ * Round down to the start of a cluster
+ */
+static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & ~(uint64_t)(s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
+{
+ return offset & (s->header.cluster_size - 1);
+}
+
+static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
+{
+ return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
+ (s->header.cluster_size - 1);
+}
+
+static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
+{
+ return pos >> s->l1_shift;
+}
+
+static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
+{
+ return (pos >> s->l2_shift) & s->l2_mask;
+}
+
+/**
+ * Test if a cluster offset is valid
+ */
+static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t header_size = (uint64_t)s->header.header_size *
+ s->header.cluster_size;
+
+ if (offset & (s->header.cluster_size - 1)) {
+ return false;
+ }
+ return offset >= header_size && offset < s->file_size;
+}
+
+/**
+ * Test if a table offset is valid
+ */
+static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
+{
+ uint64_t end_offset = offset + (s->header.table_size - 1) *
+ s->header.cluster_size;
+
+ /* Overflow check */
+ if (end_offset <= offset) {
+ return false;
+ }
+
+ return qed_check_cluster_offset(s, offset) &&
+ qed_check_cluster_offset(s, end_offset);
+}
+
+static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
+ uint64_t offset)
+{
+ if (qed_offset_into_cluster(s, offset)) {
+ return false;
+ }
+ return true;
+}
+
+static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
+{
+ if (offset == 0) {
+ return true;
+ }
+ return false;
+}
+
+static inline bool qed_offset_is_zero_cluster(uint64_t offset)
+{
+ if (offset == 1) {
+ return true;
+ }
+ return false;
+}
+
+#endif /* BLOCK_QED_H */
diff --git a/contrib/qemu/block/snapshot.c b/contrib/qemu/block/snapshot.c
new file mode 100644
index 000000000..6c6d9deea
--- /dev/null
+++ b/contrib/qemu/block/snapshot.c
@@ -0,0 +1,157 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "block/snapshot.h"
+#include "block/block_int.h"
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name)
+{
+ QEMUSnapshotInfo *sn_tab, *sn;
+ int nb_sns, i, ret;
+
+ ret = -ENOENT;
+ nb_sns = bdrv_snapshot_list(bs, &sn_tab);
+ if (nb_sns < 0) {
+ return ret;
+ }
+ for (i = 0; i < nb_sns; i++) {
+ sn = &sn_tab[i];
+ if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
+ *sn_info = *sn;
+ ret = 0;
+ break;
+ }
+ }
+ g_free(sn_tab);
+ return ret;
+}
+
+int bdrv_can_snapshot(BlockDriverState *bs)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+ return 0;
+ }
+
+ if (!drv->bdrv_snapshot_create) {
+ if (bs->file != NULL) {
+ return bdrv_can_snapshot(bs->file);
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_create) {
+ return drv->bdrv_snapshot_create(bs, sn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_create(bs->file, sn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ int ret, open_ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_goto) {
+ return drv->bdrv_snapshot_goto(bs, snapshot_id);
+ }
+
+ if (bs->file) {
+ drv->bdrv_close(bs);
+ ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ open_ret = drv->bdrv_open(bs, NULL, bs->open_flags);
+ if (open_ret < 0) {
+ bdrv_delete(bs->file);
+ bs->drv = NULL;
+ return open_ret;
+ }
+ return ret;
+ }
+
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_delete) {
+ return drv->bdrv_snapshot_delete(bs, snapshot_id);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_delete(bs->file, snapshot_id);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (drv->bdrv_snapshot_list) {
+ return drv->bdrv_snapshot_list(bs, psn_info);
+ }
+ if (bs->file) {
+ return bdrv_snapshot_list(bs->file, psn_info);
+ }
+ return -ENOTSUP;
+}
+
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name)
+{
+ BlockDriver *drv = bs->drv;
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (!bs->read_only) {
+ return -EINVAL;
+ }
+ if (drv->bdrv_snapshot_load_tmp) {
+ return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
+ }
+ return -ENOTSUP;
+}
diff --git a/contrib/qemu/config-host.h b/contrib/qemu/config-host.h
new file mode 100644
index 000000000..46b1595a8
--- /dev/null
+++ b/contrib/qemu/config-host.h
@@ -0,0 +1,73 @@
+/* Automatically generated by create_config - do not modify */
+#define CONFIG_QEMU_CONFDIR "/usr/local/etc/qemu"
+#define CONFIG_QEMU_DATADIR "/usr/local/share/qemu"
+#define CONFIG_QEMU_DOCDIR "/usr/local/share/doc/qemu"
+#define CONFIG_QEMU_LOCALSTATEDIR "/usr/local/var"
+#define CONFIG_QEMU_HELPERDIR "/usr/local/libexec"
+#define CONFIG_QEMU_LOCALEDIR "/usr/local/share/locale"
+#define HOST_X86_64 1
+#define CONFIG_QEMU_LDST_OPTIMIZATION 1
+#define CONFIG_POSIX 1
+#define CONFIG_LINUX 1
+#define CONFIG_SLIRP 1
+#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
+#define CONFIG_AUDIO_DRIVERS \
+ &oss_audio_driver,\
+
+#define CONFIG_OSS 1
+#define CONFIG_BDRV_RW_WHITELIST\
+ NULL
+#define CONFIG_BDRV_RO_WHITELIST\
+ NULL
+#define CONFIG_VNC 1
+#define CONFIG_VNC_TLS 1
+#define CONFIG_VNC_SASL 1
+#define CONFIG_VNC_WS 1
+#define CONFIG_FNMATCH 1
+#define CONFIG_UUID 1
+#define CONFIG_XFS 1
+#define QEMU_VERSION "1.5.50"
+#define QEMU_PKGVERSION ""
+#define CONFIG_CURSES 1
+#define CONFIG_UTIMENSAT 1
+#define CONFIG_PIPE2 1
+#define CONFIG_ACCEPT4 1
+#define CONFIG_SPLICE 1
+#define CONFIG_EVENTFD 1
+#define CONFIG_FALLOCATE 1
+#define CONFIG_FALLOCATE_PUNCH_HOLE 1
+#define CONFIG_SYNC_FILE_RANGE 1
+#define CONFIG_FIEMAP 1
+#define CONFIG_DUP3 1
+#define CONFIG_EPOLL 1
+#define CONFIG_EPOLL_CREATE1 1
+#define CONFIG_EPOLL_PWAIT 1
+#define CONFIG_SENDFILE 1
+#define CONFIG_INOTIFY 1
+#define CONFIG_INOTIFY1 1
+#define CONFIG_BYTESWAP_H 1
+#define CONFIG_CURL 1
+#define CONFIG_LINUX_AIO 1
+#define CONFIG_ATTR 1
+#define CONFIG_VHOST_SCSI 1
+#define CONFIG_IOVEC 1
+#define CONFIG_PREADV 1
+#define CONFIG_FDT 1
+#define CONFIG_SIGNALFD 1
+#define CONFIG_FDATASYNC 1
+#define CONFIG_MADVISE 1
+#define CONFIG_POSIX_MADVISE 1
+#define CONFIG_SIGEV_THREAD_ID 1
+#define CONFIG_UNAME_RELEASE ""
+#define CONFIG_QOM_CAST_DEBUG 1
+#define CONFIG_COROUTINE_BACKEND ucontext
+#define CONFIG_OPEN_BY_HANDLE 1
+#define CONFIG_LINUX_MAGIC_H 1
+#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
+#define CONFIG_HAS_ENVIRON 1
+#define CONFIG_CPUID_H 1
+#define CONFIG_INT128 1
+#define CONFIG_VIRTIO_BLK_DATA_PLANE $(CONFIG_VIRTIO)
+#define CONFIG_TRACE_NOP 1
+#define CONFIG_TRACE_FILE trace
+#define CONFIG_TRACE_DEFAULT 1
diff --git a/contrib/qemu/coroutine-ucontext.c b/contrib/qemu/coroutine-ucontext.c
new file mode 100644
index 000000000..4bf2cde27
--- /dev/null
+++ b/contrib/qemu/coroutine-ucontext.c
@@ -0,0 +1,225 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "block/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+ unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = g_malloc0(sizeof(*s));
+ s->current = &s->leader.base;
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ sigjmp_buf old_env;
+ union cc_arg arg = {0};
+
+ /* The ucontext functions preserve signal masks which incurs a
+ * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
+ * preserve signal masks but only works on the current stack.
+ * Since we need a way to create and switch to a new stack, use
+ * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+ * everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+ co->valgrind_stack_id =
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, siglongjmp() back out */
+ if (!sigsetjmp(old_env, 0)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+ VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+ valgrind_stack_deregister(co);
+#endif
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
diff --git a/contrib/qemu/include/block/aio.h b/contrib/qemu/include/block/aio.h
new file mode 100644
index 000000000..183679374
--- /dev/null
+++ b/contrib/qemu/include/block/aio.h
@@ -0,0 +1,247 @@
+/*
+ * QEMU aio implementation
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_AIO_H
+#define QEMU_AIO_H
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/event_notifier.h"
+
+typedef struct BlockDriverAIOCB BlockDriverAIOCB;
+typedef void BlockDriverCompletionFunc(void *opaque, int ret);
+
+typedef struct AIOCBInfo {
+ void (*cancel)(BlockDriverAIOCB *acb);
+ size_t aiocb_size;
+} AIOCBInfo;
+
+struct BlockDriverAIOCB {
+ const AIOCBInfo *aiocb_info;
+ BlockDriverState *bs;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+};
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void qemu_aio_release(void *p);
+
+typedef struct AioHandler AioHandler;
+typedef void QEMUBHFunc(void *opaque);
+typedef void IOHandler(void *opaque);
+
+typedef struct AioContext {
+ GSource source;
+
+ /* The list of registered AIO handlers */
+ QLIST_HEAD(, AioHandler) aio_handlers;
+
+ /* This is a simple lock used to protect the aio_handlers list.
+ * Specifically, it's used to ensure that no callbacks are removed while
+ * we're walking and dispatching callbacks.
+ */
+ int walking_handlers;
+
+ /* Anchor of the list of Bottom Halves belonging to the context */
+ struct QEMUBH *first_bh;
+
+ /* A simple lock used to protect the first_bh list, and ensure that
+ * no callbacks are removed while we're walking and dispatching callbacks.
+ */
+ int walking_bh;
+
+ /* Used for aio_notify. */
+ EventNotifier notifier;
+
+ /* GPollFDs for aio_poll() */
+ GArray *pollfds;
+
+ /* Thread pool for performing work and receiving completion callbacks */
+ struct ThreadPool *thread_pool;
+} AioContext;
+
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushEventNotifierHandler)(EventNotifier *e);
+
+/**
+ * aio_context_new: Allocate a new AioContext.
+ *
+ * AioContext provide a mini event-loop that can be waited on synchronously.
+ * They also provide bottom halves, a service to execute a piece of code
+ * as soon as possible.
+ */
+AioContext *aio_context_new(void);
+
+/**
+ * aio_context_ref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Add a reference to an AioContext.
+ */
+void aio_context_ref(AioContext *ctx);
+
+/**
+ * aio_context_unref:
+ * @ctx: The AioContext to operate on.
+ *
+ * Drop a reference to an AioContext.
+ */
+void aio_context_unref(AioContext *ctx);
+
+/**
+ * aio_bh_new: Allocate a new bottom half structure.
+ *
+ * Bottom halves are lightweight callbacks whose invocation is guaranteed
+ * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
+ * is opaque and must be allocated prior to its use.
+ */
+QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
+
+/**
+ * aio_notify: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, aio_notify forces
+ * aio_wait to exit, so that the next call will re-examine pending events.
+ * The caller of aio_notify will usually call aio_wait again very soon,
+ * or go through another iteration of the GLib main loop. Hence, aio_notify
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling aio_notify is rarely necessary, because for example scheduling
+ * a bottom half calls it already.
+ */
+void aio_notify(AioContext *ctx);
+
+/**
+ * aio_bh_poll: Poll bottom halves for an AioContext.
+ *
+ * These are internal functions used by the QEMU main loop.
+ */
+int aio_bh_poll(AioContext *ctx);
+
+/**
+ * qemu_bh_schedule: Schedule a bottom half.
+ *
+ * Scheduling a bottom half interrupts the main loop and causes the
+ * execution of the callback that was passed to qemu_bh_new.
+ *
+ * Bottom halves that are scheduled from a bottom half handler are instantly
+ * invoked. This can create an infinite loop if a bottom half handler
+ * schedules itself.
+ *
+ * @bh: The bottom half to be scheduled.
+ */
+void qemu_bh_schedule(QEMUBH *bh);
+
+/**
+ * qemu_bh_cancel: Cancel execution of a bottom half.
+ *
+ * Canceling execution of a bottom half undoes the effect of calls to
+ * qemu_bh_schedule without freeing its resources yet. While cancellation
+ * itself is also wait-free and thread-safe, it can of course race with the
+ * loop that executes bottom halves unless you are holding the iothread
+ * mutex. This makes it mostly useless if you are not holding the mutex.
+ *
+ * @bh: The bottom half to be canceled.
+ */
+void qemu_bh_cancel(QEMUBH *bh);
+
+/**
+ *qemu_bh_delete: Cancel execution of a bottom half and free its resources.
+ *
+ * Deleting a bottom half frees the memory that was allocated for it by
+ * qemu_bh_new. It also implies canceling the bottom half if it was
+ * scheduled.
+ *
+ * @bh: The bottom half to be deleted.
+ */
+void qemu_bh_delete(QEMUBH *bh);
+
+/* Return whether there are any pending callbacks from the GSource
+ * attached to the AioContext.
+ *
+ * This is used internally in the implementation of the GSource.
+ */
+bool aio_pending(AioContext *ctx);
+
+/* Progress in completing AIO work to occur. This can issue new pending
+ * aio as a result of executing I/O completion or bh callbacks.
+ *
+ * If there is no pending AIO operation or completion (bottom half),
+ * return false. If there are pending AIO operations of bottom halves,
+ * return true.
+ *
+ * If there are no pending bottom halves, but there are pending AIO
+ * operations, it may not be possible to make any progress without
+ * blocking. If @blocking is true, this function will wait until one
+ * or more AIO events have completed, to ensure something has moved
+ * before returning.
+ */
+bool aio_poll(AioContext *ctx, bool blocking);
+
+#ifdef CONFIG_POSIX
+/* Returns 1 if there are still outstanding AIO requests; 0 otherwise */
+typedef int (AioFlushHandler)(void *opaque);
+
+/* Register a file descriptor and associated callbacks. Behaves very similarly
+ * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
+ * be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of qemu_set_fd_handler[2].
+ */
+void aio_set_fd_handler(AioContext *ctx,
+ int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+/* Register an event notifier and associated callbacks. Behaves very similarly
+ * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
+ * will be invoked when using qemu_aio_wait().
+ *
+ * Code that invokes AIO completion functions should rely on this function
+ * instead of event_notifier_set_handler.
+ */
+void aio_set_event_notifier(AioContext *ctx,
+ EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+/* Return a GSource that lets the main loop poll the file descriptors attached
+ * to this AioContext.
+ */
+GSource *aio_get_g_source(AioContext *ctx);
+
+/* Return the ThreadPool bound to this AioContext */
+struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
+
+/* Functions to operate on the main QEMU AioContext. */
+
+bool qemu_aio_wait(void);
+void qemu_aio_set_event_notifier(EventNotifier *notifier,
+ EventNotifierHandler *io_read,
+ AioFlushEventNotifierHandler *io_flush);
+
+#ifdef CONFIG_POSIX
+void qemu_aio_set_fd_handler(int fd,
+ IOHandler *io_read,
+ IOHandler *io_write,
+ AioFlushHandler *io_flush,
+ void *opaque);
+#endif
+
+#endif
diff --git a/contrib/qemu/include/block/block.h b/contrib/qemu/include/block/block.h
new file mode 100644
index 000000000..b6b9014a9
--- /dev/null
+++ b/contrib/qemu/include/block/block.h
@@ -0,0 +1,443 @@
+#ifndef BLOCK_H
+#define BLOCK_H
+
+#include "block/aio.h"
+#include "qemu-common.h"
+#include "qemu/option.h"
+#include "block/coroutine.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi-types.h"
+
+/* block.c */
+typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
+
+typedef struct BlockDriverInfo {
+ /* in bytes, 0 if irrelevant */
+ int cluster_size;
+ /* offset at which the VM state can be saved (0 if not possible) */
+ int64_t vm_state_offset;
+ bool is_dirty;
+} BlockDriverInfo;
+
+typedef struct BlockFragInfo {
+ uint64_t allocated_clusters;
+ uint64_t total_clusters;
+ uint64_t fragmented_clusters;
+ uint64_t compressed_clusters;
+} BlockFragInfo;
+
+/* Callbacks for block device models */
+typedef struct BlockDevOps {
+ /*
+ * Runs when virtual media changed (monitor commands eject, change)
+ * Argument load is true on load and false on eject.
+ * Beware: doesn't run when a host device's physical media
+ * changes. Sure would be useful if it did.
+ * Device models with removable media must implement this callback.
+ */
+ void (*change_media_cb)(void *opaque, bool load);
+ /*
+ * Runs when an eject request is issued from the monitor, the tray
+ * is closed, and the medium is locked.
+ * Device models that do not implement is_medium_locked will not need
+ * this callback. Device models that can lock the medium or tray might
+ * want to implement the callback and unlock the tray when "force" is
+ * true, even if they do not support eject requests.
+ */
+ void (*eject_request_cb)(void *opaque, bool force);
+ /*
+ * Is the virtual tray open?
+ * Device models implement this only when the device has a tray.
+ */
+ bool (*is_tray_open)(void *opaque);
+ /*
+ * Is the virtual medium locked into the device?
+ * Device models implement this only when device has such a lock.
+ */
+ bool (*is_medium_locked)(void *opaque);
+ /*
+ * Runs when the size changed (e.g. monitor command block_resize)
+ */
+ void (*resize_cb)(void *opaque);
+} BlockDevOps;
+
+#define BDRV_O_RDWR 0x0002
+#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
+#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
+#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
+#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
+#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
+#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+
+#define BDRV_SECTOR_BITS 9
+#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
+#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1)
+
+typedef enum {
+ BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
+} BlockErrorAction;
+
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+ BlockDriverState *bs;
+ int flags;
+ void *opaque;
+} BDRVReopenState;
+
+
+void bdrv_iostatus_enable(BlockDriverState *bs);
+void bdrv_iostatus_reset(BlockDriverState *bs);
+void bdrv_iostatus_disable(BlockDriverState *bs);
+bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
+void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
+void bdrv_info_print(Monitor *mon, const QObject *data);
+void bdrv_info(Monitor *mon, QObject **ret_data);
+void bdrv_stats_print(Monitor *mon, const QObject *data);
+void bdrv_info_stats(Monitor *mon, QObject **ret_data);
+
+/* disk I/O throttling */
+void bdrv_io_limits_enable(BlockDriverState *bs);
+void bdrv_io_limits_disable(BlockDriverState *bs);
+bool bdrv_io_limits_enabled(BlockDriverState *bs);
+
+void bdrv_init(void);
+void bdrv_init_with_whitelist(void);
+BlockDriver *bdrv_find_protocol(const char *filename,
+ bool allow_protocol_prefix);
+BlockDriver *bdrv_find_format(const char *format_name);
+BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
+ bool readonly);
+int bdrv_create(BlockDriver *drv, const char* filename,
+ QEMUOptionParameter *options);
+int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
+BlockDriverState *bdrv_new(const char *device_name);
+void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
+void bdrv_delete(BlockDriverState *bs);
+int bdrv_parse_cache_flags(const char *mode, int *flags);
+int bdrv_parse_discard_flags(const char *mode, int *flags);
+int bdrv_file_open(BlockDriverState **pbs, const char *filename,
+ QDict *options, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *options);
+int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options,
+ int flags, BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
+int bdrv_attach_dev(BlockDriverState *bs, void *dev);
+void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
+void bdrv_detach_dev(BlockDriverState *bs, void *dev);
+void *bdrv_get_attached_dev(BlockDriverState *bs);
+void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
+ void *opaque);
+void bdrv_dev_eject_request(BlockDriverState *bs, bool force);
+bool bdrv_dev_has_removable_media(BlockDriverState *bs);
+bool bdrv_dev_is_tray_open(BlockDriverState *bs);
+bool bdrv_dev_is_medium_locked(BlockDriverState *bs);
+int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
+int bdrv_pread(BlockDriverState *bs, int64_t offset,
+ void *buf, int count);
+int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov);
+int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
+ const void *buf, int count);
+int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum);
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum);
+BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
+ const char *backing_file);
+int bdrv_get_backing_file_depth(BlockDriverState *bs);
+int bdrv_truncate(BlockDriverState *bs, int64_t offset);
+int64_t bdrv_getlength(BlockDriverState *bs);
+int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
+void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
+int bdrv_commit(BlockDriverState *bs);
+int bdrv_commit_all(void);
+int bdrv_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+ BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+ BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+
+
+typedef struct BdrvCheckResult {
+ int corruptions;
+ int leaks;
+ int check_errors;
+ int corruptions_fixed;
+ int leaks_fixed;
+ int64_t image_end_offset;
+ BlockFragInfo bfi;
+} BdrvCheckResult;
+
+typedef enum {
+ BDRV_FIX_LEAKS = 1,
+ BDRV_FIX_ERRORS = 2,
+} BdrvCheckMode;
+
+int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+/* async block I/O */
+typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
+ int sector_num);
+BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
+ QEMUIOVector *iov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+void bdrv_aio_cancel(BlockDriverAIOCB *acb);
+
+typedef struct BlockRequest {
+ /* Fields to be filled by multiwrite caller */
+ int64_t sector;
+ int nb_sectors;
+ QEMUIOVector *qiov;
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+
+ /* Filled by multiwrite implementation */
+ int error;
+} BlockRequest;
+
+int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs,
+ int num_reqs);
+
+/* sg packet commands */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf);
+BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+/* Invalidate any cached metadata used by image formats */
+void bdrv_invalidate_cache(BlockDriverState *bs);
+void bdrv_invalidate_cache_all(void);
+
+void bdrv_clear_incoming_migration_all(void);
+
+/* Ensure contents are flushed to disk. */
+int bdrv_flush(BlockDriverState *bs);
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
+int bdrv_flush_all(void);
+void bdrv_close_all(void);
+void bdrv_drain_all(void);
+
+int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
+int bdrv_has_zero_init_1(BlockDriverState *bs);
+int bdrv_has_zero_init(BlockDriverState *bs);
+int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ int *pnum);
+int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+ bool is_read, int error);
+int bdrv_is_read_only(BlockDriverState *bs);
+int bdrv_is_sg(BlockDriverState *bs);
+int bdrv_enable_write_cache(BlockDriverState *bs);
+void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
+int bdrv_is_inserted(BlockDriverState *bs);
+int bdrv_media_changed(BlockDriverState *bs);
+void bdrv_lock_medium(BlockDriverState *bs, bool locked);
+void bdrv_eject(BlockDriverState *bs, bool eject_flag);
+const char *bdrv_get_format_name(BlockDriverState *bs);
+BlockDriverState *bdrv_find(const char *name);
+BlockDriverState *bdrv_next(BlockDriverState *bs);
+void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs),
+ void *opaque);
+int bdrv_is_encrypted(BlockDriverState *bs);
+int bdrv_key_required(BlockDriverState *bs);
+int bdrv_set_key(BlockDriverState *bs, const char *key);
+int bdrv_query_missing_keys(void);
+void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
+ void *opaque);
+const char *bdrv_get_device_name(BlockDriverState *bs);
+int bdrv_get_flags(BlockDriverState *bs);
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors);
+
+const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
+void bdrv_get_backing_filename(BlockDriverState *bs,
+ char *filename, int filename_size);
+void bdrv_get_full_backing_filename(BlockDriverState *bs,
+ char *dest, size_t sz);
+int bdrv_is_snapshot(BlockDriverState *bs);
+
+int path_is_absolute(const char *path);
+void path_combine(char *dest, int dest_size,
+ const char *base_path,
+ const char *filename);
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size);
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+void bdrv_img_create(const char *filename, const char *fmt,
+ const char *base_filename, const char *base_fmt,
+ char *options, uint64_t img_size, int flags,
+ Error **errp, bool quiet);
+
+void bdrv_set_buffer_alignment(BlockDriverState *bs, int align);
+void *qemu_blockalign(BlockDriverState *bs, size_t size);
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
+
+struct HBitmapIter;
+void bdrv_set_dirty_tracking(BlockDriverState *bs, int granularity);
+int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BlockDriverState *bs, struct HBitmapIter *hbi);
+int64_t bdrv_get_dirty_count(BlockDriverState *bs);
+
+void bdrv_enable_copy_on_read(BlockDriverState *bs);
+void bdrv_disable_copy_on_read(BlockDriverState *bs);
+
+void bdrv_set_in_use(BlockDriverState *bs, int in_use);
+int bdrv_in_use(BlockDriverState *bs);
+
+#ifdef CONFIG_LINUX_AIO
+int raw_get_aio_fd(BlockDriverState *bs);
+#else
+static inline int raw_get_aio_fd(BlockDriverState *bs)
+{
+ return -ENOTSUP;
+}
+#endif
+
+enum BlockAcctType {
+ BDRV_ACCT_READ,
+ BDRV_ACCT_WRITE,
+ BDRV_ACCT_FLUSH,
+ BDRV_MAX_IOTYPE,
+};
+
+typedef struct BlockAcctCookie {
+ int64_t bytes;
+ int64_t start_time_ns;
+ enum BlockAcctType type;
+} BlockAcctCookie;
+
+void bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
+ int64_t bytes, enum BlockAcctType type);
+void bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie);
+
+typedef enum {
+ BLKDBG_L1_UPDATE,
+
+ BLKDBG_L1_GROW_ALLOC_TABLE,
+ BLKDBG_L1_GROW_WRITE_TABLE,
+ BLKDBG_L1_GROW_ACTIVATE_TABLE,
+
+ BLKDBG_L2_LOAD,
+ BLKDBG_L2_UPDATE,
+ BLKDBG_L2_UPDATE_COMPRESSED,
+ BLKDBG_L2_ALLOC_COW_READ,
+ BLKDBG_L2_ALLOC_WRITE,
+
+ BLKDBG_READ_AIO,
+ BLKDBG_READ_BACKING_AIO,
+ BLKDBG_READ_COMPRESSED,
+
+ BLKDBG_WRITE_AIO,
+ BLKDBG_WRITE_COMPRESSED,
+
+ BLKDBG_VMSTATE_LOAD,
+ BLKDBG_VMSTATE_SAVE,
+
+ BLKDBG_COW_READ,
+ BLKDBG_COW_WRITE,
+
+ BLKDBG_REFTABLE_LOAD,
+ BLKDBG_REFTABLE_GROW,
+
+ BLKDBG_REFBLOCK_LOAD,
+ BLKDBG_REFBLOCK_UPDATE,
+ BLKDBG_REFBLOCK_UPDATE_PART,
+ BLKDBG_REFBLOCK_ALLOC,
+ BLKDBG_REFBLOCK_ALLOC_HOOKUP,
+ BLKDBG_REFBLOCK_ALLOC_WRITE,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
+ BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
+ BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+
+ BLKDBG_CLUSTER_ALLOC,
+ BLKDBG_CLUSTER_ALLOC_BYTES,
+ BLKDBG_CLUSTER_FREE,
+
+ BLKDBG_FLUSH_TO_OS,
+ BLKDBG_FLUSH_TO_DISK,
+
+ BLKDBG_EVENT_MAX,
+} BlkDebugEvent;
+
+#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
+void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+
+int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
+ const char *tag);
+int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
+bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
+
+#endif
diff --git a/contrib/qemu/include/block/block_int.h b/contrib/qemu/include/block/block_int.h
new file mode 100644
index 000000000..c6ac871e2
--- /dev/null
+++ b/contrib/qemu/include/block/block_int.h
@@ -0,0 +1,421 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCK_INT_H
+#define BLOCK_INT_H
+
+#include "block/block.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+#include "monitor/monitor.h"
+#include "qemu/hbitmap.h"
+#include "block/snapshot.h"
+
+#define BLOCK_FLAG_ENCRYPT 1
+#define BLOCK_FLAG_COMPAT6 4
+#define BLOCK_FLAG_LAZY_REFCOUNTS 8
+
+#define BLOCK_IO_LIMIT_READ 0
+#define BLOCK_IO_LIMIT_WRITE 1
+#define BLOCK_IO_LIMIT_TOTAL 2
+
+#define BLOCK_IO_SLICE_TIME 100000000
+#define NANOSECONDS_PER_SECOND 1000000000.0
+
+#define BLOCK_OPT_SIZE "size"
+#define BLOCK_OPT_ENCRYPT "encryption"
+#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_BACKING_FILE "backing_file"
+#define BLOCK_OPT_BACKING_FMT "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE "table_size"
+#define BLOCK_OPT_PREALLOC "preallocation"
+#define BLOCK_OPT_SUBFMT "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts"
+#define BLOCK_OPT_ADAPTER_TYPE "adapter_type"
+
+typedef struct BdrvTrackedRequest {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ bool is_write;
+ QLIST_ENTRY(BdrvTrackedRequest) list;
+ Coroutine *co; /* owner, used for deadlock detection */
+ CoQueue wait_queue; /* coroutines blocked on this request */
+} BdrvTrackedRequest;
+
+
+typedef struct BlockIOLimit {
+ int64_t bps[3];
+ int64_t iops[3];
+} BlockIOLimit;
+
+typedef struct BlockIOBaseValue {
+ uint64_t bytes[2];
+ uint64_t ios[2];
+} BlockIOBaseValue;
+
+struct BlockDriver {
+ const char *format_name;
+ int instance_size;
+ int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
+ int (*bdrv_probe_device)(const char *filename);
+
+ /* Any driver implementing this callback is expected to be able to handle
+ * NULL file names in its .bdrv_open() implementation */
+ void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
+
+ /* For handling image reopen for split or non-split files */
+ int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp);
+ void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+ void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
+ int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags);
+ int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors);
+ int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+ void (*bdrv_close)(BlockDriverState *bs);
+ void (*bdrv_rebind)(BlockDriverState *bs);
+ int (*bdrv_create)(const char *filename, QEMUOptionParameter *options);
+ int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
+ int (*bdrv_make_empty)(BlockDriverState *bs);
+ /* aio */
+ BlockDriverAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque);
+ BlockDriverAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum);
+
+ /*
+ * Invalidate any cached meta-data.
+ */
+ void (*bdrv_invalidate_cache)(BlockDriverState *bs);
+
+ /*
+ * Flushes all data that was already written to the OS all the way down to
+ * the disk (for example raw-posix calls fsync()).
+ */
+ int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);
+
+ /*
+ * Flushes all internal caches to the OS. The data may still sit in a
+ * writeback cache of the host OS, but it will survive a crash of the qemu
+ * process.
+ */
+ int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);
+
+ const char *protocol_name;
+ int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
+ int64_t (*bdrv_getlength)(BlockDriverState *bs);
+ int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
+ int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors);
+
+ int (*bdrv_snapshot_create)(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+ int (*bdrv_snapshot_goto)(BlockDriverState *bs,
+ const char *snapshot_id);
+ int (*bdrv_snapshot_delete)(BlockDriverState *bs, const char *snapshot_id);
+ int (*bdrv_snapshot_list)(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+ int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
+ const char *snapshot_name);
+ int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
+
+ int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos);
+ int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size);
+
+ int (*bdrv_change_backing_file)(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt);
+
+ /* removable device specific */
+ int (*bdrv_is_inserted)(BlockDriverState *bs);
+ int (*bdrv_media_changed)(BlockDriverState *bs);
+ void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
+ void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
+
+ /* to control generic scsi devices */
+ int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
+ BlockDriverAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque);
+
+ /* List of options for creating images, terminated by name == NULL */
+ QEMUOptionParameter *create_options;
+
+
+ /*
+ * Returns 0 for completed check, -errno for internal errors.
+ * The check results are stored in result.
+ */
+ int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
+
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+
+ /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
+ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
+ const char *tag);
+ int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
+ bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);
+
+ /*
+ * Returns 1 if newly created images are guaranteed to contain only
+ * zeros, 0 otherwise.
+ */
+ int (*bdrv_has_zero_init)(BlockDriverState *bs);
+
+ QLIST_ENTRY(BlockDriver) list;
+};
+
+/*
+ * Note: the function bdrv_append() copies and swaps contents of
+ * BlockDriverStates, so if you add new fields to this struct, please
+ * inspect bdrv_append() to determine if the new fields need to be
+ * copied as well.
+ */
+struct BlockDriverState {
+ int64_t total_sectors; /* if we are reading a disk image, give its
+ size in sectors */
+ int read_only; /* if true, the media is read only */
+ int open_flags; /* flags used to open the file, re-used for re-open */
+ int encrypted; /* if true, the media is encrypted */
+ int valid_key; /* if true, a valid encryption key has been set */
+ int sg; /* if true, the device is a /dev/sg* */
+ int copy_on_read; /* if true, copy read backing sectors into image
+ note this is a reference count */
+
+ BlockDriver *drv; /* NULL means no media */
+ void *opaque;
+
+ void *dev; /* attached device model, if any */
+ /* TODO change to DeviceState when all users are qdevified */
+ const BlockDevOps *dev_ops;
+ void *dev_opaque;
+
+ char filename[1024];
+ char backing_file[1024]; /* if non zero, the image is a diff of
+ this file image */
+ char backing_format[16]; /* if non-zero and backing_file exists */
+ int is_temporary;
+
+ BlockDriverState *backing_hd;
+ BlockDriverState *file;
+
+ NotifierList close_notifiers;
+
+ /* Callback before write request is processed */
+ NotifierWithReturnList before_write_notifiers;
+
+ /* number of in-flight copy-on-read requests */
+ unsigned int copy_on_read_in_flight;
+
+ /* the time for latest disk I/O */
+ int64_t slice_start;
+ int64_t slice_end;
+ BlockIOLimit io_limits;
+ BlockIOBaseValue slice_submitted;
+ CoQueue throttled_reqs;
+ QEMUTimer *block_timer;
+ bool io_limits_enabled;
+
+ /* I/O stats (display with "info blockstats"). */
+ uint64_t nr_bytes[BDRV_MAX_IOTYPE];
+ uint64_t nr_ops[BDRV_MAX_IOTYPE];
+ uint64_t total_time_ns[BDRV_MAX_IOTYPE];
+ uint64_t wr_highest_sector;
+
+ /* Whether the disk can expand beyond total_sectors */
+ int growable;
+
+ /* the memory alignment required for the buffers handled by this driver */
+ int buffer_alignment;
+
+ /* do we need to tell the quest if we have a volatile write cache? */
+ int enable_write_cache;
+
+ /* NOTE: the following infos are only hints for real hardware
+ drivers. They are not used by the block driver */
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+ char device_name[32];
+ HBitmap *dirty_bitmap;
+ int in_use; /* users other than guest access, eg. block migration */
+ QTAILQ_ENTRY(BlockDriverState) list;
+
+ QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
+
+ /* long-running background operation */
+ BlockJob *job;
+
+ QDict *options;
+};
+
+int get_tmp_filename(char *filename, int size);
+
+void bdrv_set_io_limits(BlockDriverState *bs,
+ BlockIOLimit *io_limits);
+
+/**
+ * bdrv_add_before_write_notifier:
+ *
+ * Register a callback that is invoked before write requests are processed but
+ * after any throttling or waiting for overlapping requests.
+ */
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier);
+
+/**
+ * bdrv_get_aio_context:
+ *
+ * Returns: the currently bound #AioContext
+ */
+AioContext *bdrv_get_aio_context(BlockDriverState *bs);
+
+#ifdef _WIN32
+int is_windows_drive(const char *filename);
+#endif
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+ enum MonitorEvent ev,
+ BlockErrorAction action, bool is_read);
+
+/**
+ * stream_start:
+ * @bs: Block device to operate on.
+ * @base: Block device that will become the new base, or %NULL to
+ * flatten the whole backing file chain onto @bs.
+ * @base_id: The file name that will be written to @bs as the new
+ * backing file if the job completes. Ignored if @base is %NULL.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a streaming operation on @bs. Clusters that are unallocated
+ * in @bs, but allocated in any image between @base and @bs (both
+ * exclusive) will be written to @bs. At the end of a successful
+ * streaming job, the backing file of @bs will be changed to
+ * @base_id in the written image and to @base in the live BlockDriverState.
+ */
+void stream_start(BlockDriverState *bs, BlockDriverState *base,
+ const char *base_id, int64_t speed, BlockdevOnError on_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+ BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @granularity: The chosen granularity for the dirty bitmap.
+ * @buf_size: The amount of data that can be in flight at one time.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs. Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed. At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, int64_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/*
+ * backup_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ *
+ * Start a backup operation on @bs. Clusters in @bs are written to @target
+ * until the job is cancelled or manually completed.
+ */
+void backup_start(BlockDriverState *bs, BlockDriverState *target,
+ int64_t speed, BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb, void *opaque,
+ Error **errp);
+
+#endif /* BLOCK_INT_H */
diff --git a/contrib/qemu/include/block/blockjob.h b/contrib/qemu/include/block/blockjob.h
new file mode 100644
index 000000000..c290d07bb
--- /dev/null
+++ b/contrib/qemu/include/block/blockjob.h
@@ -0,0 +1,278 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block/block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+ /** Derived BlockJob struct size */
+ size_t instance_size;
+
+ /** String describing the operation, part of query-block-jobs QMP API */
+ const char *job_type;
+
+ /** Optional callback for job types that support setting a speed limit */
+ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+ /** Optional callback for job types that need to forward I/O status reset */
+ void (*iostatus_reset)(BlockJob *job);
+
+ /**
+ * Optional callback for job types whose completion must be triggered
+ * manually.
+ */
+ void (*complete)(BlockJob *job, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+ /** The job type, including the job vtable. */
+ const BlockJobType *job_type;
+
+ /** The block device on which the job is operating. */
+ BlockDriverState *bs;
+
+ /**
+ * The coroutine that executes the job. If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+ Coroutine *co;
+
+ /**
+ * Set to true if the job should cancel itself. The flag must
+ * always be tested just before toggling the busy flag from false
+ * to true. After a job has been cancelled, it should only yield
+ * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+ */
+ bool cancelled;
+
+ /**
+ * Set to true if the job is either paused, or will pause itself
+ * as soon as possible (if busy == true).
+ */
+ bool paused;
+
+ /**
+ * Set to false by the job while it is in a quiescent state, where
+ * no I/O is pending and the job has yielded on any condition
+ * that is not detected by #qemu_aio_wait, such as a timer.
+ */
+ bool busy;
+
+ /** Status that is published by the query-block-jobs QMP API */
+ BlockDeviceIoStatus iostatus;
+
+ /** Offset that is published by the query-block-jobs QMP API */
+ int64_t offset;
+
+ /** Length that is published by the query-block-jobs QMP API */
+ int64_t len;
+
+ /** Speed that was set with @block_job_set_speed. */
+ int64_t speed;
+
+ /** The completion function that will be called when the job completes. */
+ BlockDriverCompletionFunc *cb;
+
+ /** The opaque value that is passed to the completion function. */
+ void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it. The job
+ * will call @cb asynchronously when the job completes. Note that
+ * @bs may have been closed at the time the @cb it is called. If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds. Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_completed:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_completed(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job. The completion callback is called
+ * before the function returns. The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM. Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+ BlockdevOnError on_err,
+ int is_read, int error);
+#endif
diff --git a/contrib/qemu/include/block/coroutine.h b/contrib/qemu/include/block/coroutine.h
new file mode 100644
index 000000000..377805a3b
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine.h
@@ -0,0 +1,218 @@
+/*
+ * QEMU coroutine implementation
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_COROUTINE_H
+#define QEMU_COROUTINE_H
+
+#include <stdbool.h>
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+
+/**
+ * Coroutines are a mechanism for stack switching and can be used for
+ * cooperative userspace threading. These functions provide a simple but
+ * useful flavor of coroutines that is suitable for writing sequential code,
+ * rather than callbacks, for operations that need to give up control while
+ * waiting for events to complete.
+ *
+ * These functions are re-entrant and may be used outside the global mutex.
+ */
+
+/**
+ * Mark a function that executes in coroutine context
+ *
+ * Functions that execute in coroutine context cannot be called directly from
+ * normal functions. In the future it would be nice to enable compiler or
+ * static checker support for catching such errors. This annotation might make
+ * it possible and in the meantime it serves as documentation.
+ *
+ * For example:
+ *
+ * static void coroutine_fn foo(void) {
+ * ....
+ * }
+ */
+#define coroutine_fn
+
+typedef struct Coroutine Coroutine;
+
+/**
+ * Coroutine entry point
+ *
+ * When the coroutine is entered for the first time, opaque is passed in as an
+ * argument.
+ *
+ * When this function returns, the coroutine is destroyed automatically and
+ * execution continues in the caller who last entered the coroutine.
+ */
+typedef void coroutine_fn CoroutineEntry(void *opaque);
+
+/**
+ * Create a new coroutine
+ *
+ * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
+ */
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
+
+/**
+ * Transfer control to a coroutine
+ *
+ * The opaque argument is passed as the argument to the entry point when
+ * entering the coroutine for the first time. It is subsequently ignored.
+ */
+void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
+
+/**
+ * Transfer control back to a coroutine's caller
+ *
+ * This function does not return until the coroutine is re-entered using
+ * qemu_coroutine_enter().
+ */
+void coroutine_fn qemu_coroutine_yield(void);
+
+/**
+ * Get the currently executing coroutine
+ */
+Coroutine *coroutine_fn qemu_coroutine_self(void);
+
+/**
+ * Return whether or not currently inside a coroutine
+ *
+ * This can be used to write functions that work both when in coroutine context
+ * and when not in coroutine context. Note that such functions cannot use the
+ * coroutine_fn annotation since they work outside coroutine context.
+ */
+bool qemu_in_coroutine(void);
+
+
+
+/**
+ * CoQueues are a mechanism to queue coroutines in order to continue executing
+ * them later. They provide the fundamental primitives on which coroutine locks
+ * are built.
+ */
+typedef struct CoQueue {
+ QTAILQ_HEAD(, Coroutine) entries;
+ AioContext *ctx;
+} CoQueue;
+
+/**
+ * Initialise a CoQueue. This must be called before any other operation is used
+ * on the CoQueue.
+ */
+void qemu_co_queue_init(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
+
+/**
+ * Adds the current coroutine to the head of the CoQueue and transfers control to the
+ * caller of the coroutine.
+ */
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue);
+
+/**
+ * Restarts the next coroutine in the CoQueue and removes it from the queue.
+ *
+ * Returns true if a coroutine was restarted, false if the queue is empty.
+ */
+bool qemu_co_queue_next(CoQueue *queue);
+
+/**
+ * Restarts all coroutines in the CoQueue and leaves the queue empty.
+ */
+void qemu_co_queue_restart_all(CoQueue *queue);
+
+/**
+ * Checks if the CoQueue is empty.
+ */
+bool qemu_co_queue_empty(CoQueue *queue);
+
+
+/**
+ * Provides a mutex that can be used to synchronise coroutines
+ */
+typedef struct CoMutex {
+ bool locked;
+ CoQueue queue;
+} CoMutex;
+
+/**
+ * Initialises a CoMutex. This must be called before any other operation is used
+ * on the CoMutex.
+ */
+void qemu_co_mutex_init(CoMutex *mutex);
+
+/**
+ * Locks the mutex. If the lock cannot be taken immediately, control is
+ * transferred to the caller of the current coroutine.
+ */
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
+
+/**
+ * Unlocks the mutex and schedules the next coroutine that was waiting for this
+ * lock to be run.
+ */
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
+
+typedef struct CoRwlock {
+ bool writer;
+ int reader;
+ CoQueue queue;
+} CoRwlock;
+
+/**
+ * Initialises a CoRwlock. This must be called before any other operation
+ * is used on the CoRwlock
+ */
+void qemu_co_rwlock_init(CoRwlock *lock);
+
+/**
+ * Read locks the CoRwlock. If the lock cannot be taken immediately because
+ * of a parallel writer, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_rdlock(CoRwlock *lock);
+
+/**
+ * Write Locks the mutex. If the lock cannot be taken immediately because
+ * of a parallel reader, control is transferred to the caller of the current
+ * coroutine.
+ */
+void qemu_co_rwlock_wrlock(CoRwlock *lock);
+
+/**
+ * Unlocks the read/write lock and schedules the next coroutine that was
+ * waiting for this lock to be run.
+ */
+void qemu_co_rwlock_unlock(CoRwlock *lock);
+
+/**
+ * Yield the coroutine for a given duration
+ *
+ * Note this function uses timers and hence only works when a main loop is in
+ * use. See main-loop.h and do not use from qemu-tool programs.
+ */
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns);
+
+/**
+ * Yield until a file descriptor becomes readable
+ *
+ * Note that this function clobbers the handlers for the file descriptor.
+ */
+void coroutine_fn yield_until_fd_readable(int fd);
+#endif /* QEMU_COROUTINE_H */
diff --git a/contrib/qemu/include/block/coroutine_int.h b/contrib/qemu/include/block/coroutine_int.h
new file mode 100644
index 000000000..f133d65af
--- /dev/null
+++ b/contrib/qemu/include/block/coroutine_int.h
@@ -0,0 +1,53 @@
+/*
+ * Coroutine internals
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_COROUTINE_INT_H
+#define QEMU_COROUTINE_INT_H
+
+#include "qemu/queue.h"
+#include "block/coroutine.h"
+
+typedef enum {
+ COROUTINE_YIELD = 1,
+ COROUTINE_TERMINATE = 2,
+} CoroutineAction;
+
+struct Coroutine {
+ CoroutineEntry *entry;
+ void *entry_arg;
+ Coroutine *caller;
+ QSLIST_ENTRY(Coroutine) pool_next;
+
+ /* Coroutines that should be woken up when we yield or terminate */
+ QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
+ QTAILQ_ENTRY(Coroutine) co_queue_next;
+};
+
+Coroutine *qemu_coroutine_new(void);
+void qemu_coroutine_delete(Coroutine *co);
+CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
+ CoroutineAction action);
+void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
+
+#endif
diff --git a/contrib/qemu/include/block/snapshot.h b/contrib/qemu/include/block/snapshot.h
new file mode 100644
index 000000000..eaf61f032
--- /dev/null
+++ b/contrib/qemu/include/block/snapshot.h
@@ -0,0 +1,53 @@
+/*
+ * Block layer snapshot related functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef SNAPSHOT_H
+#define SNAPSHOT_H
+
+#include "qemu-common.h"
+
+typedef struct QEMUSnapshotInfo {
+ char id_str[128]; /* unique snapshot id */
+ /* the following fields are informative. They are not needed for
+ the consistency of the snapshot */
+ char name[256]; /* user chosen name */
+ uint64_t vm_state_size; /* VM state info size */
+ uint32_t date_sec; /* UTC date of the snapshot */
+ uint32_t date_nsec;
+ uint64_t vm_clock_nsec; /* VM clock relative to boot */
+} QEMUSnapshotInfo;
+
+int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
+ const char *name);
+int bdrv_can_snapshot(BlockDriverState *bs);
+int bdrv_snapshot_create(BlockDriverState *bs,
+ QEMUSnapshotInfo *sn_info);
+int bdrv_snapshot_goto(BlockDriverState *bs,
+ const char *snapshot_id);
+int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id);
+int bdrv_snapshot_list(BlockDriverState *bs,
+ QEMUSnapshotInfo **psn_info);
+int bdrv_snapshot_load_tmp(BlockDriverState *bs,
+ const char *snapshot_name);
+#endif
diff --git a/contrib/qemu/include/config.h b/contrib/qemu/include/config.h
new file mode 100644
index 000000000..e20f78696
--- /dev/null
+++ b/contrib/qemu/include/config.h
@@ -0,0 +1,2 @@
+#include "config-host.h"
+#include "config-target.h"
diff --git a/contrib/qemu/include/exec/cpu-common.h b/contrib/qemu/include/exec/cpu-common.h
new file mode 100644
index 000000000..e4996e19c
--- /dev/null
+++ b/contrib/qemu/include/exec/cpu-common.h
@@ -0,0 +1,124 @@
+#ifndef CPU_COMMON_H
+#define CPU_COMMON_H 1
+
+/* CPU interfaces that are target independent. */
+
+#ifndef CONFIG_USER_ONLY
+#include "exec/hwaddr.h"
+#endif
+
+#ifndef NEED_CPU_H
+#include "exec/poison.h"
+#endif
+
+#include "qemu/bswap.h"
+#include "qemu/queue.h"
+
+/**
+ * CPUListState:
+ * @cpu_fprintf: Print function.
+ * @file: File to print to using @cpu_fprint.
+ *
+ * State commonly used for iterating over CPU models.
+ */
+typedef struct CPUListState {
+ fprintf_function cpu_fprintf;
+ FILE *file;
+} CPUListState;
+
+#if !defined(CONFIG_USER_ONLY)
+
+enum device_endian {
+ DEVICE_NATIVE_ENDIAN,
+ DEVICE_BIG_ENDIAN,
+ DEVICE_LITTLE_ENDIAN,
+};
+
+/* address in the RAM (different from a physical address) */
+#if defined(CONFIG_XEN_BACKEND)
+typedef uint64_t ram_addr_t;
+# define RAM_ADDR_MAX UINT64_MAX
+# define RAM_ADDR_FMT "%" PRIx64
+#else
+typedef uintptr_t ram_addr_t;
+# define RAM_ADDR_MAX UINTPTR_MAX
+# define RAM_ADDR_FMT "%" PRIxPTR
+#endif
+
+/* memory API */
+
+typedef void CPUWriteMemoryFunc(void *opaque, hwaddr addr, uint32_t value);
+typedef uint32_t CPUReadMemoryFunc(void *opaque, hwaddr addr);
+
+void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
+/* This should not be used by devices. */
+MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr);
+void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev);
+
+void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
+ int len, int is_write);
+static inline void cpu_physical_memory_read(hwaddr addr,
+ void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, buf, len, 0);
+}
+static inline void cpu_physical_memory_write(hwaddr addr,
+ const void *buf, int len)
+{
+ cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+}
+void *cpu_physical_memory_map(hwaddr addr,
+ hwaddr *plen,
+ int is_write);
+void cpu_physical_memory_unmap(void *buffer, hwaddr len,
+ int is_write, hwaddr access_len);
+void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
+
+bool cpu_physical_memory_is_io(hwaddr phys_addr);
+
+/* Coalesced MMIO regions are areas where write operations can be reordered.
+ * This usually implies that write operations are side-effect free. This allows
+ * batching which can make a major impact on performance when using
+ * virtualization.
+ */
+void qemu_flush_coalesced_mmio_buffer(void);
+
+uint32_t ldub_phys(hwaddr addr);
+uint32_t lduw_le_phys(hwaddr addr);
+uint32_t lduw_be_phys(hwaddr addr);
+uint32_t ldl_le_phys(hwaddr addr);
+uint32_t ldl_be_phys(hwaddr addr);
+uint64_t ldq_le_phys(hwaddr addr);
+uint64_t ldq_be_phys(hwaddr addr);
+void stb_phys(hwaddr addr, uint32_t val);
+void stw_le_phys(hwaddr addr, uint32_t val);
+void stw_be_phys(hwaddr addr, uint32_t val);
+void stl_le_phys(hwaddr addr, uint32_t val);
+void stl_be_phys(hwaddr addr, uint32_t val);
+void stq_le_phys(hwaddr addr, uint64_t val);
+void stq_be_phys(hwaddr addr, uint64_t val);
+
+#ifdef NEED_CPU_H
+uint32_t lduw_phys(hwaddr addr);
+uint32_t ldl_phys(hwaddr addr);
+uint64_t ldq_phys(hwaddr addr);
+void stl_phys_notdirty(hwaddr addr, uint32_t val);
+void stw_phys(hwaddr addr, uint32_t val);
+void stl_phys(hwaddr addr, uint32_t val);
+void stq_phys(hwaddr addr, uint64_t val);
+#endif
+
+void cpu_physical_memory_write_rom(hwaddr addr,
+ const uint8_t *buf, int len);
+
+extern struct MemoryRegion io_mem_rom;
+extern struct MemoryRegion io_mem_notdirty;
+
+typedef void (RAMBlockIterFunc)(void *host_addr,
+ ram_addr_t offset, ram_addr_t length, void *opaque);
+
+void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
+
+#endif
+
+#endif /* !CPU_COMMON_H */
diff --git a/contrib/qemu/include/exec/hwaddr.h b/contrib/qemu/include/exec/hwaddr.h
new file mode 100644
index 000000000..c9eb78fba
--- /dev/null
+++ b/contrib/qemu/include/exec/hwaddr.h
@@ -0,0 +1,20 @@
+/* Define hwaddr if it exists. */
+
+#ifndef HWADDR_H
+#define HWADDR_H
+
+#define HWADDR_BITS 64
+/* hwaddr is the type of a physical address (its size can
+ be different from 'target_ulong'). */
+
+typedef uint64_t hwaddr;
+#define HWADDR_MAX UINT64_MAX
+#define TARGET_FMT_plx "%016" PRIx64
+#define HWADDR_PRId PRId64
+#define HWADDR_PRIi PRIi64
+#define HWADDR_PRIo PRIo64
+#define HWADDR_PRIu PRIu64
+#define HWADDR_PRIx PRIx64
+#define HWADDR_PRIX PRIX64
+
+#endif
diff --git a/contrib/qemu/include/exec/poison.h b/contrib/qemu/include/exec/poison.h
new file mode 100644
index 000000000..2341a7504
--- /dev/null
+++ b/contrib/qemu/include/exec/poison.h
@@ -0,0 +1,63 @@
+/* Poison identifiers that should not be used when building
+ target independent device code. */
+
+#ifndef HW_POISON_H
+#define HW_POISON_H
+#ifdef __GNUC__
+
+#pragma GCC poison TARGET_I386
+#pragma GCC poison TARGET_X86_64
+#pragma GCC poison TARGET_ALPHA
+#pragma GCC poison TARGET_ARM
+#pragma GCC poison TARGET_CRIS
+#pragma GCC poison TARGET_LM32
+#pragma GCC poison TARGET_M68K
+#pragma GCC poison TARGET_MIPS
+#pragma GCC poison TARGET_MIPS64
+#pragma GCC poison TARGET_OPENRISC
+#pragma GCC poison TARGET_PPC
+#pragma GCC poison TARGET_PPCEMB
+#pragma GCC poison TARGET_PPC64
+#pragma GCC poison TARGET_ABI32
+#pragma GCC poison TARGET_SH4
+#pragma GCC poison TARGET_SPARC
+#pragma GCC poison TARGET_SPARC64
+
+#pragma GCC poison TARGET_WORDS_BIGENDIAN
+#pragma GCC poison BSWAP_NEEDED
+
+#pragma GCC poison TARGET_LONG_BITS
+#pragma GCC poison TARGET_FMT_lx
+#pragma GCC poison TARGET_FMT_ld
+
+#pragma GCC poison TARGET_PAGE_SIZE
+#pragma GCC poison TARGET_PAGE_MASK
+#pragma GCC poison TARGET_PAGE_BITS
+#pragma GCC poison TARGET_PAGE_ALIGN
+
+#pragma GCC poison CPUArchState
+#pragma GCC poison env
+
+#pragma GCC poison lduw_phys
+#pragma GCC poison ldl_phys
+#pragma GCC poison ldq_phys
+#pragma GCC poison stl_phys_notdirty
+#pragma GCC poison stw_phys
+#pragma GCC poison stl_phys
+#pragma GCC poison stq_phys
+
+#pragma GCC poison CPU_INTERRUPT_HARD
+#pragma GCC poison CPU_INTERRUPT_EXITTB
+#pragma GCC poison CPU_INTERRUPT_HALT
+#pragma GCC poison CPU_INTERRUPT_DEBUG
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_2
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_3
+#pragma GCC poison CPU_INTERRUPT_TGT_EXT_4
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_0
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_1
+#pragma GCC poison CPU_INTERRUPT_TGT_INT_2
+
+#endif
+#endif
diff --git a/contrib/qemu/include/fpu/softfloat.h b/contrib/qemu/include/fpu/softfloat.h
new file mode 100644
index 000000000..f3927e241
--- /dev/null
+++ b/contrib/qemu/include/fpu/softfloat.h
@@ -0,0 +1,641 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
+#include <sunmath.h>
+#endif
+
+#include <inttypes.h>
+#include "config-host.h"
+#include "qemu/osdep.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified. For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+ float_relation_less = -1,
+ float_relation_equal = 0,
+ float_relation_greater = 1,
+ float_relation_unordered = 2
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+/* Use structures for soft-float types. This prevents accidentally mixing
+ them with native int/float types. A sufficiently clever compiler and
+ sane ABI should be able to see though these structs. However
+ x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
+//#define USE_SOFTFLOAT_STRUCT_TYPES
+#ifdef USE_SOFTFLOAT_STRUCT_TYPES
+typedef struct {
+ uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
+ uint32_t v;
+} float32;
+/* The cast ensures an error if the wrong type is passed. */
+#define float32_val(x) (((float32)(x)).v)
+#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
+typedef struct {
+ uint64_t v;
+} float64;
+#define float64_val(x) (((float64)(x)).v)
+#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
+#else
+typedef uint16_t float16;
+typedef uint32_t float32;
+typedef uint64_t float64;
+#define float16_val(x) (x)
+#define float32_val(x) (x)
+#define float64_val(x) (x)
+#define make_float16(x) (x)
+#define make_float32(x) (x)
+#define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
+#endif
+typedef struct {
+ uint64_t low;
+ uint16_t high;
+} floatx80;
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
+#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
+typedef struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint64_t high, low;
+#else
+ uint64_t low, high;
+#endif
+} float128;
+#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
+#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_tininess_after_rounding = 0,
+ float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_round_nearest_even = 0,
+ float_round_down = 1,
+ float_round_up = 2,
+ float_round_to_zero = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+ float_flag_invalid = 1,
+ float_flag_divbyzero = 4,
+ float_flag_overflow = 8,
+ float_flag_underflow = 16,
+ float_flag_inexact = 32,
+ float_flag_input_denormal = 64,
+ float_flag_output_denormal = 128
+};
+
+typedef struct float_status {
+ signed char float_detect_tininess;
+ signed char float_rounding_mode;
+ signed char float_exception_flags;
+ signed char floatx80_rounding_precision;
+ /* should denormalised results go to zero and set the inexact flag? */
+ flag flush_to_zero;
+ /* should denormalised inputs go to zero and set the input_denormal flag? */
+ flag flush_inputs_to_zero;
+ flag default_nan_mode;
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+ STATUS(float_detect_tininess) = val;
+}
+INLINE void set_flush_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_to_zero) = val;
+}
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_inputs_to_zero) = val;
+}
+INLINE void set_default_nan_mode(flag val STATUS_PARAM)
+{
+ STATUS(default_nan_mode) = val;
+}
+INLINE int get_float_exception_flags(float_status *status)
+{
+ return STATUS(float_exception_flags);
+}
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+ float_muladd_negate_c = 1,
+ float_muladd_negate_product = 2,
+ float_muladd_negate_result = 4,
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
+float32 uint32_to_float32( uint32 STATUS_PARAM );
+float64 uint32_to_float64( uint32 STATUS_PARAM );
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
+float128 uint64_to_float128( uint64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+INLINE int float16_is_any_nan(float16 a)
+{
+ return ((float16_val(a) & ~0x8000) > 0x7c00);
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float16 float16_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+float128 float32_to_float128( float32 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
+float32 float32_sqrt( float32 STATUS_PARAM );
+float32 float32_exp2( float32 STATUS_PARAM );
+float32 float32_log2( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
+int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
+float32 float32_scalbn( float32, int STATUS_PARAM );
+
+INLINE float32 float32_abs(float32 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) & 0x7fffffff);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) ^ 0x80000000);
+}
+
+INLINE int float32_is_infinity(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0x7f800000;
+}
+
+INLINE int float32_is_neg(float32 a)
+{
+ return float32_val(a) >> 31;
+}
+
+INLINE int float32_is_zero(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0;
+}
+
+INLINE int float32_is_any_nan(float32 a)
+{
+ return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+ return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+ return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
+#define float32_zero make_float32(0)
+#define float32_one make_float32(0x3f800000)
+#define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float32 float32_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+float32 float64_to_float32( float64 STATUS_PARAM );
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+float128 float64_to_float128( float64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
+float64 float64_sqrt( float64 STATUS_PARAM );
+float64 float64_log2( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
+int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
+float64 float64_scalbn( float64, int STATUS_PARAM );
+
+INLINE float64 float64_abs(float64 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) ^ 0x8000000000000000LL);
+}
+
+INLINE int float64_is_infinity(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
+}
+
+INLINE int float64_is_neg(float64 a)
+{
+ return float64_val(a) >> 63;
+}
+
+INLINE int float64_is_zero(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
+}
+
+INLINE int float64_is_any_nan(float64 a)
+{
+ return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE int float64_is_zero_or_denormal(float64 a)
+{
+ return (float64_val(a) & 0x7ff0000000000000LL) == 0;
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+ return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+ | ((int64_t)sign << 63));
+}
+
+#define float64_zero make_float64(0)
+#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float64 float64_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
+int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
+floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ a.high &= 0x7fff;
+ return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ a.high ^= 0x8000;
+ return a;
+}
+
+INLINE int floatx80_is_infinity(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
+}
+
+INLINE int floatx80_is_neg(floatx80 a)
+{
+ return a.high >> 15;
+}
+
+INLINE int floatx80_is_zero(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0 && a.low == 0;
+}
+
+INLINE int floatx80_is_zero_or_denormal(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0;
+}
+
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+ return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
+int float128_compare( float128, float128 STATUS_PARAM );
+int float128_compare_quiet( float128, float128 STATUS_PARAM );
+int float128_is_quiet_nan( float128 );
+int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
+float128 float128_scalbn( float128, int STATUS_PARAM );
+
+INLINE float128 float128_abs(float128 a)
+{
+ a.high &= 0x7fffffffffffffffLL;
+ return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+ a.high ^= 0x8000000000000000LL;
+ return a;
+}
+
+INLINE int float128_is_infinity(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
+}
+
+INLINE int float128_is_neg(float128 a)
+{
+ return a.high >> 63;
+}
+
+INLINE int float128_is_zero(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
+}
+
+INLINE int float128_is_zero_or_denormal(float128 a)
+{
+ return (a.high & 0x7fff000000000000LL) == 0;
+}
+
+INLINE int float128_is_any_nan(float128 a)
+{
+ return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+ ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
+
+#define float128_zero make_float128(0, 0)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* !SOFTFLOAT_H */
diff --git a/contrib/qemu/include/glib-compat.h b/contrib/qemu/include/glib-compat.h
new file mode 100644
index 000000000..8aa77afd6
--- /dev/null
+++ b/contrib/qemu/include/glib-compat.h
@@ -0,0 +1,27 @@
+/*
+ * GLIB Compatibility Functions
+ *
+ * Copyright IBM, Corp. 2013
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_GLIB_COMPAT_H
+#define QEMU_GLIB_COMPAT_H
+
+#include <glib.h>
+
+#if !GLIB_CHECK_VERSION(2, 14, 0)
+static inline guint g_timeout_add_seconds(guint interval, GSourceFunc function,
+ gpointer data)
+{
+ return g_timeout_add(interval * 1000, function, data);
+}
+#endif
+
+#endif
diff --git a/contrib/qemu/include/migration/migration.h b/contrib/qemu/include/migration/migration.h
new file mode 100644
index 000000000..bc9fde0b2
--- /dev/null
+++ b/contrib/qemu/include/migration/migration.h
@@ -0,0 +1,157 @@
+/*
+ * QEMU live migration
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MIGRATION_H
+#define QEMU_MIGRATION_H
+
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "qapi-types.h"
+#include "exec/cpu-common.h"
+
+struct MigrationParams {
+ bool blk;
+ bool shared;
+};
+
+typedef struct MigrationState MigrationState;
+
+struct MigrationState
+{
+ int64_t bandwidth_limit;
+ size_t bytes_xfer;
+ size_t xfer_limit;
+ QemuThread thread;
+ QEMUBH *cleanup_bh;
+ QEMUFile *file;
+
+ int state;
+ MigrationParams params;
+ double mbps;
+ int64_t total_time;
+ int64_t downtime;
+ int64_t expected_downtime;
+ int64_t dirty_pages_rate;
+ int64_t dirty_bytes_rate;
+ bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
+ int64_t xbzrle_cache_size;
+};
+
+void process_incoming_migration(QEMUFile *f);
+
+void qemu_start_incoming_migration(const char *uri, Error **errp);
+
+uint64_t migrate_max_downtime(void);
+
+void do_info_migrate_print(Monitor *mon, const QObject *data);
+
+void do_info_migrate(Monitor *mon, QObject **ret_data);
+
+void exec_start_incoming_migration(const char *host_port, Error **errp);
+
+void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void tcp_start_incoming_migration(const char *host_port, Error **errp);
+
+void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
+
+void unix_start_incoming_migration(const char *path, Error **errp);
+
+void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
+
+void fd_start_incoming_migration(const char *path, Error **errp);
+
+void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
+
+void migrate_fd_error(MigrationState *s);
+
+void migrate_fd_connect(MigrationState *s);
+
+int migrate_fd_close(MigrationState *s);
+
+void add_migration_state_change_notifier(Notifier *notify);
+void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
+bool migration_has_finished(MigrationState *);
+bool migration_has_failed(MigrationState *);
+MigrationState *migrate_get_current(void);
+
+uint64_t ram_bytes_remaining(void);
+uint64_t ram_bytes_transferred(void);
+uint64_t ram_bytes_total(void);
+
+void acct_update_position(QEMUFile *f, size_t size, bool zero);
+
+extern SaveVMHandlers savevm_ram_handlers;
+
+uint64_t dup_mig_bytes_transferred(void);
+uint64_t dup_mig_pages_transferred(void);
+uint64_t skipped_mig_bytes_transferred(void);
+uint64_t skipped_mig_pages_transferred(void);
+uint64_t norm_mig_bytes_transferred(void);
+uint64_t norm_mig_pages_transferred(void);
+uint64_t xbzrle_mig_bytes_transferred(void);
+uint64_t xbzrle_mig_pages_transferred(void);
+uint64_t xbzrle_mig_pages_overflow(void);
+uint64_t xbzrle_mig_pages_cache_miss(void);
+
+/**
+ * @migrate_add_blocker - prevent migration from proceeding
+ *
+ * @reason - an error to be returned whenever migration is attempted
+ */
+void migrate_add_blocker(Error *reason);
+
+/**
+ * @migrate_del_blocker - remove a blocking error from migration
+ *
+ * @reason - the error blocking migration
+ */
+void migrate_del_blocker(Error *reason);
+
+bool migrate_rdma_pin_all(void);
+
+bool migrate_auto_converge(void);
+
+int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
+ uint8_t *dst, int dlen);
+int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
+
+int migrate_use_xbzrle(void);
+int64_t migrate_xbzrle_cache_size(void);
+
+int64_t xbzrle_cache_resize(int64_t new_size);
+
+void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+
+/* Whenever this is found in the data stream, the flags
+ * will be passed to ram_control_load_hook in the incoming-migration
+ * side. This lets before_ram_iterate/after_ram_iterate add
+ * transport-specific sections to the RAM migration data.
+ */
+#define RAM_SAVE_FLAG_HOOK 0x80
+
+#define RAM_SAVE_CONTROL_NOT_SUPP -1000
+#define RAM_SAVE_CONTROL_DELAYED -2000
+
+size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset,
+ ram_addr_t offset, size_t size,
+ int *bytes_sent);
+
+#endif
diff --git a/contrib/qemu/include/migration/qemu-file.h b/contrib/qemu/include/migration/qemu-file.h
new file mode 100644
index 000000000..0f757fbeb
--- /dev/null
+++ b/contrib/qemu/include/migration/qemu-file.h
@@ -0,0 +1,266 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_FILE_H
+#define QEMU_FILE_H 1
+#include "exec/cpu-common.h"
+
+/* This function writes a chunk of data to a file at the given position.
+ * The pos argument can be ignored if the file is only being used for
+ * streaming. The handler should try to write all of the data it can.
+ */
+typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf,
+ int64_t pos, int size);
+
+/* Read a chunk of data from a file at the given position. The pos argument
+ * can be ignored if the file is only be used for streaming. The number of
+ * bytes actually read should be returned.
+ */
+typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf,
+ int64_t pos, int size);
+
+/* Close a file
+ *
+ * Return negative error number on error, 0 or positive value on success.
+ *
+ * The meaning of return value on success depends on the specific back-end being
+ * used.
+ */
+typedef int (QEMUFileCloseFunc)(void *opaque);
+
+/* Called to return the OS file descriptor associated to the QEMUFile.
+ */
+typedef int (QEMUFileGetFD)(void *opaque);
+
+/*
+ * This function writes an iovec to file.
+ */
+typedef ssize_t (QEMUFileWritevBufferFunc)(void *opaque, struct iovec *iov,
+ int iovcnt, int64_t pos);
+
+/*
+ * This function provides hooks around different
+ * stages of RAM migration.
+ */
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+
+/*
+ * Constants used by ram_control_* hooks
+ */
+#define RAM_CONTROL_SETUP 0
+#define RAM_CONTROL_ROUND 1
+#define RAM_CONTROL_HOOK 2
+#define RAM_CONTROL_FINISH 3
+
+/*
+ * This function allows override of where the RAM page
+ * is saved (such as RDMA, for example.)
+ */
+typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque,
+ ram_addr_t block_offset,
+ ram_addr_t offset,
+ size_t size,
+ int *bytes_sent);
+
+typedef struct QEMUFileOps {
+ QEMUFilePutBufferFunc *put_buffer;
+ QEMUFileGetBufferFunc *get_buffer;
+ QEMUFileCloseFunc *close;
+ QEMUFileGetFD *get_fd;
+ QEMUFileWritevBufferFunc *writev_buffer;
+ QEMURamHookFunc *before_ram_iterate;
+ QEMURamHookFunc *after_ram_iterate;
+ QEMURamHookFunc *hook_ram_load;
+ QEMURamSaveFunc *save_page;
+} QEMUFileOps;
+
+QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops);
+QEMUFile *qemu_fopen(const char *filename, const char *mode);
+QEMUFile *qemu_fdopen(int fd, const char *mode);
+QEMUFile *qemu_fopen_socket(int fd, const char *mode);
+QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
+int qemu_get_fd(QEMUFile *f);
+int qemu_fclose(QEMUFile *f);
+int64_t qemu_ftell(QEMUFile *f);
+void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
+void qemu_put_byte(QEMUFile *f, int v);
+/*
+ * put_buffer without copying the buffer.
+ * The buffer should be available till it is sent asynchronously.
+ */
+void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size);
+bool qemu_file_mode_is_not_valid(const char *mode);
+
+static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v)
+{
+ qemu_put_byte(f, (int)v);
+}
+
+#define qemu_put_sbyte qemu_put_byte
+
+void qemu_put_be16(QEMUFile *f, unsigned int v);
+void qemu_put_be32(QEMUFile *f, unsigned int v);
+void qemu_put_be64(QEMUFile *f, uint64_t v);
+int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size);
+int qemu_get_byte(QEMUFile *f);
+void qemu_update_position(QEMUFile *f, size_t size);
+
+static inline unsigned int qemu_get_ubyte(QEMUFile *f)
+{
+ return (unsigned int)qemu_get_byte(f);
+}
+
+#define qemu_get_sbyte qemu_get_byte
+
+unsigned int qemu_get_be16(QEMUFile *f);
+unsigned int qemu_get_be32(QEMUFile *f);
+uint64_t qemu_get_be64(QEMUFile *f);
+
+int qemu_file_rate_limit(QEMUFile *f);
+void qemu_file_reset_rate_limit(QEMUFile *f);
+void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
+int64_t qemu_file_get_rate_limit(QEMUFile *f);
+int qemu_file_get_error(QEMUFile *f);
+void qemu_fflush(QEMUFile *f);
+
+static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
+{
+ qemu_put_be64(f, *pv);
+}
+
+static inline void qemu_put_be32s(QEMUFile *f, const uint32_t *pv)
+{
+ qemu_put_be32(f, *pv);
+}
+
+static inline void qemu_put_be16s(QEMUFile *f, const uint16_t *pv)
+{
+ qemu_put_be16(f, *pv);
+}
+
+static inline void qemu_put_8s(QEMUFile *f, const uint8_t *pv)
+{
+ qemu_put_byte(f, *pv);
+}
+
+static inline void qemu_get_be64s(QEMUFile *f, uint64_t *pv)
+{
+ *pv = qemu_get_be64(f);
+}
+
+static inline void qemu_get_be32s(QEMUFile *f, uint32_t *pv)
+{
+ *pv = qemu_get_be32(f);
+}
+
+static inline void qemu_get_be16s(QEMUFile *f, uint16_t *pv)
+{
+ *pv = qemu_get_be16(f);
+}
+
+static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv)
+{
+ *pv = qemu_get_byte(f);
+}
+
+// Signed versions for type safety
+static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, int size)
+{
+ qemu_put_buffer(f, (const uint8_t *)buf, size);
+}
+
+static inline void qemu_put_sbe16(QEMUFile *f, int v)
+{
+ qemu_put_be16(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe32(QEMUFile *f, int v)
+{
+ qemu_put_be32(f, (unsigned int)v);
+}
+
+static inline void qemu_put_sbe64(QEMUFile *f, int64_t v)
+{
+ qemu_put_be64(f, (uint64_t)v);
+}
+
+static inline size_t qemu_get_sbuffer(QEMUFile *f, int8_t *buf, int size)
+{
+ return qemu_get_buffer(f, (uint8_t *)buf, size);
+}
+
+static inline int qemu_get_sbe16(QEMUFile *f)
+{
+ return (int)qemu_get_be16(f);
+}
+
+static inline int qemu_get_sbe32(QEMUFile *f)
+{
+ return (int)qemu_get_be32(f);
+}
+
+static inline int64_t qemu_get_sbe64(QEMUFile *f)
+{
+ return (int64_t)qemu_get_be64(f);
+}
+
+static inline void qemu_put_s8s(QEMUFile *f, const int8_t *pv)
+{
+ qemu_put_8s(f, (const uint8_t *)pv);
+}
+
+static inline void qemu_put_sbe16s(QEMUFile *f, const int16_t *pv)
+{
+ qemu_put_be16s(f, (const uint16_t *)pv);
+}
+
+static inline void qemu_put_sbe32s(QEMUFile *f, const int32_t *pv)
+{
+ qemu_put_be32s(f, (const uint32_t *)pv);
+}
+
+static inline void qemu_put_sbe64s(QEMUFile *f, const int64_t *pv)
+{
+ qemu_put_be64s(f, (const uint64_t *)pv);
+}
+
+static inline void qemu_get_s8s(QEMUFile *f, int8_t *pv)
+{
+ qemu_get_8s(f, (uint8_t *)pv);
+}
+
+static inline void qemu_get_sbe16s(QEMUFile *f, int16_t *pv)
+{
+ qemu_get_be16s(f, (uint16_t *)pv);
+}
+
+static inline void qemu_get_sbe32s(QEMUFile *f, int32_t *pv)
+{
+ qemu_get_be32s(f, (uint32_t *)pv);
+}
+
+static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
+{
+ qemu_get_be64s(f, (uint64_t *)pv);
+}
+#endif
diff --git a/contrib/qemu/include/migration/vmstate.h b/contrib/qemu/include/migration/vmstate.h
new file mode 100644
index 000000000..1c31b5d6f
--- /dev/null
+++ b/contrib/qemu/include/migration/vmstate.h
@@ -0,0 +1,740 @@
+/*
+ * QEMU migration/snapshot declarations
+ *
+ * Copyright (c) 2009-2011 Red Hat, Inc.
+ *
+ * Original author: Juan Quintela <quintela@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef QEMU_VMSTATE_H
+#define QEMU_VMSTATE_H 1
+
+#ifndef CONFIG_USER_ONLY
+#include <migration/qemu-file.h>
+#endif
+
+typedef void SaveStateHandler(QEMUFile *f, void *opaque);
+typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id);
+
+typedef struct SaveVMHandlers {
+ /* This runs inside the iothread lock. */
+ void (*set_params)(const MigrationParams *params, void * opaque);
+ SaveStateHandler *save_state;
+
+ void (*cancel)(void *opaque);
+ int (*save_live_complete)(QEMUFile *f, void *opaque);
+
+ /* This runs both outside and inside the iothread lock. */
+ bool (*is_active)(void *opaque);
+
+ /* This runs outside the iothread lock in the migration case, and
+ * within the lock in the savevm case. The callback had better only
+ * use data that is local to the migration thread or protected
+ * by other locks.
+ */
+ int (*save_live_iterate)(QEMUFile *f, void *opaque);
+
+ /* This runs outside the iothread lock! */
+ int (*save_live_setup)(QEMUFile *f, void *opaque);
+ uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size);
+
+ LoadStateHandler *load_state;
+} SaveVMHandlers;
+
+int register_savevm(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveStateHandler *save_state,
+ LoadStateHandler *load_state,
+ void *opaque);
+
+int register_savevm_live(DeviceState *dev,
+ const char *idstr,
+ int instance_id,
+ int version_id,
+ SaveVMHandlers *ops,
+ void *opaque);
+
+void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque);
+void register_device_unmigratable(DeviceState *dev, const char *idstr,
+ void *opaque);
+
+
+typedef struct VMStateInfo VMStateInfo;
+typedef struct VMStateDescription VMStateDescription;
+
+struct VMStateInfo {
+ const char *name;
+ int (*get)(QEMUFile *f, void *pv, size_t size);
+ void (*put)(QEMUFile *f, void *pv, size_t size);
+};
+
+enum VMStateFlags {
+ VMS_SINGLE = 0x001,
+ VMS_POINTER = 0x002,
+ VMS_ARRAY = 0x004,
+ VMS_STRUCT = 0x008,
+ VMS_VARRAY_INT32 = 0x010, /* Array with size in int32_t field*/
+ VMS_BUFFER = 0x020, /* static sized buffer */
+ VMS_ARRAY_OF_POINTER = 0x040,
+ VMS_VARRAY_UINT16 = 0x080, /* Array with size in uint16_t field */
+ VMS_VBUFFER = 0x100, /* Buffer with size in int32_t field */
+ VMS_MULTIPLY = 0x200, /* multiply "size" field by field_size */
+ VMS_VARRAY_UINT8 = 0x400, /* Array with size in uint8_t field*/
+ VMS_VARRAY_UINT32 = 0x800, /* Array with size in uint32_t field*/
+};
+
+typedef struct {
+ const char *name;
+ size_t offset;
+ size_t size;
+ size_t start;
+ int num;
+ size_t num_offset;
+ size_t size_offset;
+ const VMStateInfo *info;
+ enum VMStateFlags flags;
+ const VMStateDescription *vmsd;
+ int version_id;
+ bool (*field_exists)(void *opaque, int version_id);
+} VMStateField;
+
+typedef struct VMStateSubsection {
+ const VMStateDescription *vmsd;
+ bool (*needed)(void *opaque);
+} VMStateSubsection;
+
+struct VMStateDescription {
+ const char *name;
+ int unmigratable;
+ int version_id;
+ int minimum_version_id;
+ int minimum_version_id_old;
+ LoadStateHandler *load_state_old;
+ int (*pre_load)(void *opaque);
+ int (*post_load)(void *opaque, int version_id);
+ void (*pre_save)(void *opaque);
+ VMStateField *fields;
+ const VMStateSubsection *subsections;
+};
+
+#ifdef CONFIG_USER_ONLY
+extern const VMStateDescription vmstate_dummy;
+#endif
+
+extern const VMStateInfo vmstate_info_bool;
+
+extern const VMStateInfo vmstate_info_int8;
+extern const VMStateInfo vmstate_info_int16;
+extern const VMStateInfo vmstate_info_int32;
+extern const VMStateInfo vmstate_info_int64;
+
+extern const VMStateInfo vmstate_info_uint8_equal;
+extern const VMStateInfo vmstate_info_uint16_equal;
+extern const VMStateInfo vmstate_info_int32_equal;
+extern const VMStateInfo vmstate_info_uint32_equal;
+extern const VMStateInfo vmstate_info_uint64_equal;
+extern const VMStateInfo vmstate_info_int32_le;
+
+extern const VMStateInfo vmstate_info_uint8;
+extern const VMStateInfo vmstate_info_uint16;
+extern const VMStateInfo vmstate_info_uint32;
+extern const VMStateInfo vmstate_info_uint64;
+
+extern const VMStateInfo vmstate_info_float64;
+
+extern const VMStateInfo vmstate_info_timer;
+extern const VMStateInfo vmstate_info_buffer;
+extern const VMStateInfo vmstate_info_unused_buffer;
+extern const VMStateInfo vmstate_info_bitmap;
+
+#define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
+#define type_check_array(t1,t2,n) ((t1(*)[n])0 - (t2*)0)
+#define type_check_pointer(t1,t2) ((t1**)0 - (t2*)0)
+
+#define vmstate_offset_value(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_pointer(_state, _field, _type) \
+ (offsetof(_state, _field) + \
+ type_check_pointer(_type, typeof_field(_state, _field)))
+
+#define vmstate_offset_array(_state, _field, _type, _num) \
+ (offsetof(_state, _field) + \
+ type_check_array(_type, typeof_field(_state, _field), _num))
+
+#define vmstate_offset_2darray(_state, _field, _type, _n1, _n2) \
+ (offsetof(_state, _field) + \
+ type_check_2darray(_type, typeof_field(_state, _field), _n1, _n2))
+
+#define vmstate_offset_sub_array(_state, _field, _type, _start) \
+ (offsetof(_state, _field[_start]))
+
+#define vmstate_offset_buffer(_state, _field) \
+ vmstate_offset_array(_state, _field, uint8_t, \
+ sizeof(typeof_field(_state, _field)))
+
+#define VMSTATE_SINGLE_TEST(_field, _state, _test, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .info = &(_info), \
+ .flags = VMS_SINGLE, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER(_field, _state, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_POINTER_TEST(_field, _state, _test, _info, _type) { \
+ .name = (stringify(_field)), \
+ .info = &(_info), \
+ .field_exists = (_test), \
+ .size = sizeof(_type), \
+ .flags = VMS_SINGLE|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_2DARRAY(_field, _state, _n1, _n2, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_n1) * (_n2), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_2darray(_state, _field, _type, _n1, _n2), \
+}
+
+#define VMSTATE_ARRAY_TEST(_field, _state, _num, _test, _info, _type) {\
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_SUB_ARRAY(_field, _state, _start, _num, _version, _info, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY, \
+ .offset = vmstate_offset_sub_array(_state, _field, _type, _start), \
+}
+
+#define VMSTATE_ARRAY_INT32_UNSAFE(_field, _state, _field_num, _info, _type) {\
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_VARRAY_INT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_INT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT32(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT32|VMS_POINTER, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_VARRAY_UINT16, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_TEST(_field, _state, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_POINTER_TEST(_field, _state, _test, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .field_exists = (_test), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_POINTER, \
+ .offset = vmstate_offset_value(_state, _field, _type), \
+}
+
+#define VMSTATE_ARRAY_OF_POINTER(_field, _state, _num, _version, _info, _type) {\
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .num = (_num), \
+ .info = &(_info), \
+ .size = sizeof(_type), \
+ .flags = VMS_ARRAY|VMS_ARRAY_OF_POINTER, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num), \
+}
+
+#define VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, _test, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num = (_num), \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_ARRAY, \
+ .offset = vmstate_offset_array(_state, _field, _type, _num),\
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT8, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_INT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT32(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_INT32 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_POINTER_UINT16(_field, _state, _field_num, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .version_id = 0, \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\
+ .size = sizeof(_type), \
+ .vmsd = &(_vmsd), \
+ .flags = VMS_POINTER | VMS_VARRAY_UINT16 | VMS_STRUCT, \
+ .offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_INT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, int32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_INT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STRUCT_VARRAY_UINT32(_field, _state, _field_num, _version, _vmsd, _type) { \
+ .name = (stringify(_field)), \
+ .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+ .version_id = (_version), \
+ .vmsd = &(_vmsd), \
+ .size = sizeof(_type), \
+ .flags = VMS_STRUCT|VMS_VARRAY_UINT32, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_STATIC_BUFFER(_field, _state, _version, _test, _start, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size = (_size - _start), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER, \
+ .offset = vmstate_offset_buffer(_state, _field) + _start, \
+}
+
+#define VMSTATE_VBUFFER_MULTIPLY(_field, _state, _version, _test, _start, _field_size, _multiply) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .size = (_multiply), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER|VMS_MULTIPLY, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_VBUFFER_UINT32(_field, _state, _version, _test, _start, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .field_exists = (_test), \
+ .size_offset = vmstate_offset_value(_state, _field_size, uint32_t),\
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+ .start = (_start), \
+}
+
+#define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &(_info), \
+ .flags = VMS_BUFFER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_BUFFER_POINTER_UNSAFE(_field, _state, _version, _size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_buffer, \
+ .flags = VMS_BUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+#define VMSTATE_UNUSED_BUFFER(_test, _version, _size) { \
+ .name = "unused", \
+ .field_exists = (_test), \
+ .version_id = (_version), \
+ .size = (_size), \
+ .info = &vmstate_info_unused_buffer, \
+ .flags = VMS_BUFFER, \
+}
+
+/* _field_size should be a int32_t field in the _state struct giving the
+ * size of the bitmap _field in bits.
+ */
+#define VMSTATE_BITMAP(_field, _state, _version, _field_size) { \
+ .name = (stringify(_field)), \
+ .version_id = (_version), \
+ .size_offset = vmstate_offset_value(_state, _field_size, int32_t),\
+ .info = &vmstate_info_bitmap, \
+ .flags = VMS_VBUFFER|VMS_POINTER, \
+ .offset = offsetof(_state, _field), \
+}
+
+/* _f : field name
+ _f_n : num of elements field_name
+ _n : num of elements
+ _s : struct state name
+ _v : version
+*/
+
+#define VMSTATE_SINGLE(_field, _state, _version, _info, _type) \
+ VMSTATE_SINGLE_TEST(_field, _state, NULL, _version, _info, _type)
+
+#define VMSTATE_STRUCT(_field, _state, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_TEST(_field, _state, NULL, _version, _vmsd, _type)
+
+#define VMSTATE_STRUCT_POINTER(_field, _state, _vmsd, _type) \
+ VMSTATE_STRUCT_POINTER_TEST(_field, _state, NULL, _vmsd, _type)
+
+#define VMSTATE_STRUCT_ARRAY(_field, _state, _num, _version, _vmsd, _type) \
+ VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version, \
+ _vmsd, _type)
+
+#define VMSTATE_BOOL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_INT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int8, int8_t)
+#define VMSTATE_INT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int16, int16_t)
+#define VMSTATE_INT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int32, int32_t)
+#define VMSTATE_INT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_UINT8_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint8, uint8_t)
+#define VMSTATE_UINT16_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16, uint16_t)
+#define VMSTATE_UINT32_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32, uint32_t)
+#define VMSTATE_UINT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_BOOL(_f, _s) \
+ VMSTATE_BOOL_V(_f, _s, 0)
+
+#define VMSTATE_INT8(_f, _s) \
+ VMSTATE_INT8_V(_f, _s, 0)
+#define VMSTATE_INT16(_f, _s) \
+ VMSTATE_INT16_V(_f, _s, 0)
+#define VMSTATE_INT32(_f, _s) \
+ VMSTATE_INT32_V(_f, _s, 0)
+#define VMSTATE_INT64(_f, _s) \
+ VMSTATE_INT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8(_f, _s) \
+ VMSTATE_UINT8_V(_f, _s, 0)
+#define VMSTATE_UINT16(_f, _s) \
+ VMSTATE_UINT16_V(_f, _s, 0)
+#define VMSTATE_UINT32(_f, _s) \
+ VMSTATE_UINT32_V(_f, _s, 0)
+#define VMSTATE_UINT64(_f, _s) \
+ VMSTATE_UINT64_V(_f, _s, 0)
+
+#define VMSTATE_UINT8_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint8_equal, uint8_t)
+
+#define VMSTATE_UINT16_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_UINT16_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint16_equal, uint16_t)
+
+#define VMSTATE_INT32_EQUAL(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_equal, int32_t)
+
+#define VMSTATE_UINT32_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint32_equal, uint32_t)
+
+#define VMSTATE_UINT32_EQUAL(_f, _s) \
+ VMSTATE_UINT32_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_UINT64_EQUAL_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_uint64_equal, uint64_t)
+
+#define VMSTATE_UINT64_EQUAL(_f, _s) \
+ VMSTATE_UINT64_EQUAL_V(_f, _s, 0)
+
+#define VMSTATE_INT32_LE(_f, _s) \
+ VMSTATE_SINGLE(_f, _s, 0, vmstate_info_int32_le, int32_t)
+
+#define VMSTATE_UINT8_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT16_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT32_TEST(_f, _s, _t) \
+ VMSTATE_SINGLE_TEST(_f, _s, _t, 0, vmstate_info_uint32, uint32_t)
+
+
+#define VMSTATE_FLOAT64_V(_f, _s, _v) \
+ VMSTATE_SINGLE(_f, _s, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64(_f, _s) \
+ VMSTATE_FLOAT64_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_TEST(_f, _s, _test) \
+ VMSTATE_POINTER_TEST(_f, _s, _test, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER_V(_f, _s, _v) \
+ VMSTATE_POINTER(_f, _s, _v, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_TIMER(_f, _s) \
+ VMSTATE_TIMER_V(_f, _s, 0)
+
+#define VMSTATE_TIMER_ARRAY(_f, _s, _n) \
+ VMSTATE_ARRAY_OF_POINTER(_f, _s, _n, 0, vmstate_info_timer, QEMUTimer *)
+
+#define VMSTATE_BOOL_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_bool, bool)
+
+#define VMSTATE_BOOL_ARRAY(_f, _s, _n) \
+ VMSTATE_BOOL_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint16, uint16_t)
+
+#define VMSTATE_UINT16_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT16_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT16_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, _v) \
+ VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint8, uint8_t)
+
+#define VMSTATE_UINT8_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT8_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT8_2DARRAY(_f, _s, _n1, _n2) \
+ VMSTATE_UINT8_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
+#define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)
+
+#define VMSTATE_UINT64_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT16_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int16, int16_t)
+
+#define VMSTATE_INT16_ARRAY(_f, _s, _n) \
+ VMSTATE_INT16_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT32_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int32, int32_t)
+
+#define VMSTATE_INT32_ARRAY(_f, _s, _n) \
+ VMSTATE_INT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \
+ VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t)
+
+#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \
+ VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t)
+
+#define VMSTATE_INT64_ARRAY(_f, _s, _n) \
+ VMSTATE_INT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, _v) \
+ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_float64, float64)
+
+#define VMSTATE_FLOAT64_ARRAY(_f, _s, _n) \
+ VMSTATE_FLOAT64_ARRAY_V(_f, _s, _n, 0)
+
+#define VMSTATE_BUFFER_V(_f, _s, _v) \
+ VMSTATE_STATIC_BUFFER(_f, _s, _v, NULL, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER(_f, _s) \
+ VMSTATE_BUFFER_V(_f, _s, 0)
+
+#define VMSTATE_PARTIAL_BUFFER(_f, _s, _size) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_BUFFER_START_MIDDLE(_f, _s, _start) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, NULL, _start, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_PARTIAL_VBUFFER(_f, _s, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_PARTIAL_VBUFFER_UINT32(_f, _s, _size) \
+ VMSTATE_VBUFFER_UINT32(_f, _s, 0, NULL, 0, _size)
+
+#define VMSTATE_SUB_VBUFFER(_f, _s, _start, _size) \
+ VMSTATE_VBUFFER(_f, _s, 0, NULL, _start, _size)
+
+#define VMSTATE_BUFFER_TEST(_f, _s, _test) \
+ VMSTATE_STATIC_BUFFER(_f, _s, 0, _test, 0, sizeof(typeof_field(_s, _f)))
+
+#define VMSTATE_BUFFER_UNSAFE(_field, _state, _version, _size) \
+ VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, vmstate_info_buffer, _size)
+
+#define VMSTATE_UNUSED_V(_v, _size) \
+ VMSTATE_UNUSED_BUFFER(NULL, _v, _size)
+
+#define VMSTATE_UNUSED(_size) \
+ VMSTATE_UNUSED_V(0, _size)
+
+#define VMSTATE_UNUSED_TEST(_test, _size) \
+ VMSTATE_UNUSED_BUFFER(_test, 0, _size)
+
+#define VMSTATE_END_OF_LIST() \
+ {}
+
+int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque, int version_id);
+void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
+ void *opaque);
+
+int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *base, int alias_id,
+ int required_for_version);
+
+static inline int vmstate_register(DeviceState *dev, int instance_id,
+ const VMStateDescription *vmsd,
+ void *opaque)
+{
+ return vmstate_register_with_alias_id(dev, instance_id, vmsd,
+ opaque, -1, 0);
+}
+
+void vmstate_unregister(DeviceState *dev, const VMStateDescription *vmsd,
+ void *opaque);
+
+struct MemoryRegion;
+void vmstate_register_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_unregister_ram(struct MemoryRegion *memory, DeviceState *dev);
+void vmstate_register_ram_global(struct MemoryRegion *memory);
+
+#endif
diff --git a/contrib/qemu/include/monitor/monitor.h b/contrib/qemu/include/monitor/monitor.h
new file mode 100644
index 000000000..1942cc42f
--- /dev/null
+++ b/contrib/qemu/include/monitor/monitor.h
@@ -0,0 +1,104 @@
+#ifndef MONITOR_H
+#define MONITOR_H
+
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "block/block.h"
+#include "monitor/readline.h"
+
+extern Monitor *cur_mon;
+extern Monitor *default_mon;
+
+/* flags for monitor_init */
+#define MONITOR_IS_DEFAULT 0x01
+#define MONITOR_USE_READLINE 0x02
+#define MONITOR_USE_CONTROL 0x04
+#define MONITOR_USE_PRETTY 0x08
+
+/* flags for monitor commands */
+#define MONITOR_CMD_ASYNC 0x0001
+
+/* QMP events */
+typedef enum MonitorEvent {
+ QEVENT_SHUTDOWN,
+ QEVENT_RESET,
+ QEVENT_POWERDOWN,
+ QEVENT_STOP,
+ QEVENT_RESUME,
+ QEVENT_VNC_CONNECTED,
+ QEVENT_VNC_INITIALIZED,
+ QEVENT_VNC_DISCONNECTED,
+ QEVENT_BLOCK_IO_ERROR,
+ QEVENT_RTC_CHANGE,
+ QEVENT_WATCHDOG,
+ QEVENT_SPICE_CONNECTED,
+ QEVENT_SPICE_INITIALIZED,
+ QEVENT_SPICE_DISCONNECTED,
+ QEVENT_BLOCK_JOB_COMPLETED,
+ QEVENT_BLOCK_JOB_CANCELLED,
+ QEVENT_BLOCK_JOB_ERROR,
+ QEVENT_BLOCK_JOB_READY,
+ QEVENT_DEVICE_DELETED,
+ QEVENT_DEVICE_TRAY_MOVED,
+ QEVENT_NIC_RX_FILTER_CHANGED,
+ QEVENT_SUSPEND,
+ QEVENT_SUSPEND_DISK,
+ QEVENT_WAKEUP,
+ QEVENT_BALLOON_CHANGE,
+ QEVENT_SPICE_MIGRATE_COMPLETED,
+ QEVENT_GUEST_PANICKED,
+
+ /* Add to 'monitor_event_names' array in monitor.c when
+ * defining new events here */
+
+ QEVENT_MAX,
+} MonitorEvent;
+
+int monitor_cur_is_qmp(void);
+
+void monitor_protocol_event(MonitorEvent event, QObject *data);
+void monitor_init(CharDriverState *chr, int flags);
+
+int monitor_suspend(Monitor *mon);
+void monitor_resume(Monitor *mon);
+
+int monitor_read_bdrv_key_start(Monitor *mon, BlockDriverState *bs,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+int monitor_read_block_device_key(Monitor *mon, const char *device,
+ BlockDriverCompletionFunc *completion_cb,
+ void *opaque);
+
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
+int monitor_handle_fd_param(Monitor *mon, const char *fdname);
+
+void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
+ GCC_FMT_ATTR(2, 0);
+void monitor_printf(Monitor *mon, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+void monitor_print_filename(Monitor *mon, const char *filename);
+void monitor_flush(Monitor *mon);
+int monitor_set_cpu(int cpu_index);
+int monitor_get_cpu_index(void);
+
+typedef void (MonitorCompletion)(void *opaque, QObject *ret_data);
+
+void monitor_set_error(Monitor *mon, QError *qerror);
+void monitor_read_command(Monitor *mon, int show_prompt);
+ReadLineState *monitor_get_rs(Monitor *mon);
+int monitor_read_password(Monitor *mon, ReadLineFunc *readline_func,
+ void *opaque);
+
+int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
+
+int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
+
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+ bool has_opaque, const char *opaque,
+ Error **errp);
+int monitor_fdset_get_fd(int64_t fdset_id, int flags);
+int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd);
+int monitor_fdset_dup_fd_remove(int dup_fd);
+int monitor_fdset_dup_fd_find(int dup_fd);
+
+#endif /* !MONITOR_H */
diff --git a/contrib/qemu/include/monitor/readline.h b/contrib/qemu/include/monitor/readline.h
new file mode 100644
index 000000000..fc9806ecf
--- /dev/null
+++ b/contrib/qemu/include/monitor/readline.h
@@ -0,0 +1,55 @@
+#ifndef READLINE_H
+#define READLINE_H
+
+#include "qemu-common.h"
+
+#define READLINE_CMD_BUF_SIZE 4095
+#define READLINE_MAX_CMDS 64
+#define READLINE_MAX_COMPLETIONS 256
+
+typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque);
+typedef void ReadLineCompletionFunc(const char *cmdline);
+
+typedef struct ReadLineState {
+ char cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int cmd_buf_index;
+ int cmd_buf_size;
+
+ char last_cmd_buf[READLINE_CMD_BUF_SIZE + 1];
+ int last_cmd_buf_index;
+ int last_cmd_buf_size;
+
+ int esc_state;
+ int esc_param;
+
+ char *history[READLINE_MAX_CMDS];
+ int hist_entry;
+
+ ReadLineCompletionFunc *completion_finder;
+ char *completions[READLINE_MAX_COMPLETIONS];
+ int nb_completions;
+ int completion_index;
+
+ ReadLineFunc *readline_func;
+ void *readline_opaque;
+ int read_password;
+ char prompt[256];
+ Monitor *mon;
+} ReadLineState;
+
+void readline_add_completion(ReadLineState *rs, const char *str);
+void readline_set_completion_index(ReadLineState *rs, int completion_index);
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index);
+
+void readline_handle_byte(ReadLineState *rs, int ch);
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+ ReadLineFunc *readline_func, void *opaque);
+void readline_restart(ReadLineState *rs);
+void readline_show_prompt(ReadLineState *rs);
+
+ReadLineState *readline_init(Monitor *mon,
+ ReadLineCompletionFunc *completion_finder);
+
+#endif /* !READLINE_H */
diff --git a/contrib/qemu/include/qapi/error.h b/contrib/qemu/include/qapi/error.h
new file mode 100644
index 000000000..ffd1cea47
--- /dev/null
+++ b/contrib/qemu/include/qapi/error.h
@@ -0,0 +1,85 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+#ifndef ERROR_H
+#define ERROR_H
+
+#include "qemu/compiler.h"
+#include "qapi-types.h"
+#include <stdbool.h>
+
+/**
+ * A class representing internal errors within QEMU. An error has a ErrorClass
+ * code and a human message.
+ */
+typedef struct Error Error;
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message. This function is not meant to be used outside
+ * of QEMU.
+ */
+void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(3, 4);
+
+/**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a strerror() string if
+ * @os_error is not zero.
+ */
+void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+
+/**
+ * Same as error_set(), but sets a generic error
+ */
+#define error_setg(err, fmt, ...) \
+ error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#define error_setg_errno(err, os_error, fmt, ...) \
+ error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+
+/**
+ * Helper for open() errors
+ */
+void error_setg_file_open(Error **errp, int os_errno, const char *filename);
+
+/**
+ * Returns true if an indirect pointer to an error is pointing to a valid
+ * error object.
+ */
+bool error_is_set(Error **err);
+
+/*
+ * Get the error class of an error object.
+ */
+ErrorClass error_get_class(const Error *err);
+
+/**
+ * Returns an exact copy of the error passed as an argument.
+ */
+Error *error_copy(const Error *err);
+
+/**
+ * Get a human readable representation of an error object.
+ */
+const char *error_get_pretty(Error *err);
+
+/**
+ * Propagate an error to an indirect pointer to an error. This function will
+ * always transfer ownership of the error reference and handles the case where
+ * dst_err is NULL correctly. Errors after the first are discarded.
+ */
+void error_propagate(Error **dst_err, Error *local_err);
+
+/**
+ * Free an error object.
+ */
+void error_free(Error *err);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-lexer.h b/contrib/qemu/include/qapi/qmp/json-lexer.h
new file mode 100644
index 000000000..cdff0460a
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-lexer.h
@@ -0,0 +1,51 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_LEXER_H
+#define QEMU_JSON_LEXER_H
+
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qlist.h"
+
+typedef enum json_token_type {
+ JSON_OPERATOR = 100,
+ JSON_INTEGER,
+ JSON_FLOAT,
+ JSON_KEYWORD,
+ JSON_STRING,
+ JSON_ESCAPE,
+ JSON_SKIP,
+ JSON_ERROR,
+} JSONTokenType;
+
+typedef struct JSONLexer JSONLexer;
+
+typedef void (JSONLexerEmitter)(JSONLexer *, QString *, JSONTokenType, int x, int y);
+
+struct JSONLexer
+{
+ JSONLexerEmitter *emit;
+ int state;
+ QString *token;
+ int x, y;
+};
+
+void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func);
+
+int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size);
+
+int json_lexer_flush(JSONLexer *lexer);
+
+void json_lexer_destroy(JSONLexer *lexer);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-parser.h b/contrib/qemu/include/qapi/qmp/json-parser.h
new file mode 100644
index 000000000..44d88f346
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-parser.h
@@ -0,0 +1,24 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_PARSER_H
+#define QEMU_JSON_PARSER_H
+
+#include "qemu-common.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/error.h"
+
+QObject *json_parser_parse(QList *tokens, va_list *ap);
+QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/json-streamer.h b/contrib/qemu/include/qapi/qmp/json-streamer.h
new file mode 100644
index 000000000..823f7d7fa
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/json-streamer.h
@@ -0,0 +1,40 @@
+/*
+ * JSON streaming support
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_JSON_STREAMER_H
+#define QEMU_JSON_STREAMER_H
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/json-lexer.h"
+
+typedef struct JSONMessageParser
+{
+ void (*emit)(struct JSONMessageParser *parser, QList *tokens);
+ JSONLexer lexer;
+ int brace_count;
+ int bracket_count;
+ QList *tokens;
+ uint64_t token_size;
+} JSONMessageParser;
+
+void json_message_parser_init(JSONMessageParser *parser,
+ void (*func)(JSONMessageParser *, QList *));
+
+int json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size);
+
+int json_message_parser_flush(JSONMessageParser *parser);
+
+void json_message_parser_destroy(JSONMessageParser *parser);
+
+#endif
diff --git a/contrib/qemu/include/qapi/qmp/qbool.h b/contrib/qemu/include/qapi/qmp/qbool.h
new file mode 100644
index 000000000..c4eaab9bb
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qbool.h
@@ -0,0 +1,29 @@
+/*
+ * QBool Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QBOOL_H
+#define QBOOL_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QBool {
+ QObject_HEAD;
+ int value;
+} QBool;
+
+QBool *qbool_from_int(int value);
+int qbool_get_int(const QBool *qb);
+QBool *qobject_to_qbool(const QObject *obj);
+
+#endif /* QBOOL_H */
diff --git a/contrib/qemu/include/qapi/qmp/qdict.h b/contrib/qemu/include/qapi/qmp/qdict.h
new file mode 100644
index 000000000..685b2e3fc
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qdict.h
@@ -0,0 +1,69 @@
+/*
+ * QDict Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QDICT_H
+#define QDICT_H
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qlist.h"
+#include "qemu/queue.h"
+#include <stdint.h>
+
+#define QDICT_BUCKET_MAX 512
+
+typedef struct QDictEntry {
+ char *key;
+ QObject *value;
+ QLIST_ENTRY(QDictEntry) next;
+} QDictEntry;
+
+typedef struct QDict {
+ QObject_HEAD;
+ size_t size;
+ QLIST_HEAD(,QDictEntry) table[QDICT_BUCKET_MAX];
+} QDict;
+
+/* Object API */
+QDict *qdict_new(void);
+const char *qdict_entry_key(const QDictEntry *entry);
+QObject *qdict_entry_value(const QDictEntry *entry);
+size_t qdict_size(const QDict *qdict);
+void qdict_put_obj(QDict *qdict, const char *key, QObject *value);
+void qdict_del(QDict *qdict, const char *key);
+int qdict_haskey(const QDict *qdict, const char *key);
+QObject *qdict_get(const QDict *qdict, const char *key);
+QDict *qobject_to_qdict(const QObject *obj);
+void qdict_iter(const QDict *qdict,
+ void (*iter)(const char *key, QObject *obj, void *opaque),
+ void *opaque);
+const QDictEntry *qdict_first(const QDict *qdict);
+const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry);
+
+/* Helper to qdict_put_obj(), accepts any object */
+#define qdict_put(qdict, key, obj) \
+ qdict_put_obj(qdict, key, QOBJECT(obj))
+
+/* High level helpers */
+double qdict_get_double(const QDict *qdict, const char *key);
+int64_t qdict_get_int(const QDict *qdict, const char *key);
+int qdict_get_bool(const QDict *qdict, const char *key);
+QList *qdict_get_qlist(const QDict *qdict, const char *key);
+QDict *qdict_get_qdict(const QDict *qdict, const char *key);
+const char *qdict_get_str(const QDict *qdict, const char *key);
+int64_t qdict_get_try_int(const QDict *qdict, const char *key,
+ int64_t def_value);
+int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value);
+const char *qdict_get_try_str(const QDict *qdict, const char *key);
+
+QDict *qdict_clone_shallow(const QDict *src);
+
+#endif /* QDICT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qerror.h b/contrib/qemu/include/qapi/qmp/qerror.h
new file mode 100644
index 000000000..c30c2f6d7
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qerror.h
@@ -0,0 +1,249 @@
+/*
+ * QError Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+#ifndef QERROR_H
+#define QERROR_H
+
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi-types.h"
+#include <stdarg.h>
+
+typedef struct QError {
+ QObject_HEAD;
+ Location loc;
+ char *err_msg;
+ ErrorClass err_class;
+} QError;
+
+QString *qerror_human(const QError *qerror);
+void qerror_report(ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(2, 3);
+void qerror_report_err(Error *err);
+void assert_no_error(Error *err);
+
+/*
+ * QError class list
+ * Please keep the definitions in alphabetical order.
+ * Use scripts/check-qerror.sh to check.
+ */
+#define QERR_ADD_CLIENT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Could not add client"
+
+#define QERR_AMBIGUOUS_PATH \
+ ERROR_CLASS_GENERIC_ERROR, "Path '%s' does not uniquely identify an object"
+
+#define QERR_BAD_BUS_FOR_DEVICE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' can't go on a %s bus"
+
+#define QERR_BASE_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
+
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+ ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
+#define QERR_BLOCK_JOB_PAUSED \
+ ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
+#define QERR_BLOCK_JOB_NOT_READY \
+ ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
+
+#define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
+
+#define QERR_BUFFER_OVERRUN \
+ ERROR_CLASS_GENERIC_ERROR, "An internal buffer overran"
+
+#define QERR_BUS_NO_HOTPLUG \
+ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' does not support hotplugging"
+
+#define QERR_BUS_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Bus '%s' not found"
+
+#define QERR_COMMAND_DISABLED \
+ ERROR_CLASS_GENERIC_ERROR, "The command %s has been disabled for this instance"
+
+#define QERR_COMMAND_NOT_FOUND \
+ ERROR_CLASS_COMMAND_NOT_FOUND, "The command %s has not been found"
+
+#define QERR_DEVICE_ENCRYPTED \
+ ERROR_CLASS_DEVICE_ENCRYPTED, "'%s' (%s) is encrypted"
+
+#define QERR_DEVICE_FEATURE_BLOCKS_MIGRATION \
+ ERROR_CLASS_GENERIC_ERROR, "Migration is disabled when using feature '%s' in device '%s'"
+
+#define QERR_DEVICE_HAS_NO_MEDIUM \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no medium"
+
+#define QERR_DEVICE_INIT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' could not be initialized"
+
+#define QERR_DEVICE_IN_USE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is in use"
+
+#define QERR_DEVICE_IS_READ_ONLY \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is read only"
+
+#define QERR_DEVICE_LOCKED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is locked"
+
+#define QERR_DEVICE_MULTIPLE_BUSSES \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has multiple child busses"
+
+#define QERR_DEVICE_NO_BUS \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' has no child bus"
+
+#define QERR_DEVICE_NO_HOTPLUG \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' does not support hotplugging"
+
+#define QERR_DEVICE_NOT_ACTIVE \
+ ERROR_CLASS_DEVICE_NOT_ACTIVE, "Device '%s' has not been activated"
+
+#define QERR_DEVICE_NOT_ENCRYPTED \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not encrypted"
+
+#define QERR_DEVICE_NOT_FOUND \
+ ERROR_CLASS_DEVICE_NOT_FOUND, "Device '%s' not found"
+
+#define QERR_DEVICE_NOT_REMOVABLE \
+ ERROR_CLASS_GENERIC_ERROR, "Device '%s' is not removable"
+
+#define QERR_DUPLICATE_ID \
+ ERROR_CLASS_GENERIC_ERROR, "Duplicate ID '%s' for %s"
+
+#define QERR_FD_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "File descriptor named '%s' not found"
+
+#define QERR_FD_NOT_SUPPLIED \
+ ERROR_CLASS_GENERIC_ERROR, "No file descriptor supplied via SCM_RIGHTS"
+
+#define QERR_FEATURE_DISABLED \
+ ERROR_CLASS_GENERIC_ERROR, "The feature '%s' is not enabled"
+
+#define QERR_INVALID_BLOCK_FORMAT \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid block format '%s'"
+
+#define QERR_INVALID_OPTION_GROUP \
+ ERROR_CLASS_GENERIC_ERROR, "There is no option group '%s'"
+
+#define QERR_INVALID_PARAMETER \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter '%s'"
+
+#define QERR_INVALID_PARAMETER_COMBINATION \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter combination"
+
+#define QERR_INVALID_PARAMETER_TYPE \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid parameter type for '%s', expected: %s"
+
+#define QERR_INVALID_PARAMETER_VALUE \
+ ERROR_CLASS_GENERIC_ERROR, "Parameter '%s' expects %s"
+
+#define QERR_INVALID_PASSWORD \
+ ERROR_CLASS_GENERIC_ERROR, "Password incorrect"
+
+#define QERR_IO_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "An IO error has occurred"
+
+#define QERR_JSON_PARSE_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "JSON parse error, %s"
+
+#define QERR_JSON_PARSING \
+ ERROR_CLASS_GENERIC_ERROR, "Invalid JSON syntax"
+
+#define QERR_KVM_MISSING_CAP \
+ ERROR_CLASS_K_V_M_MISSING_CAP, "Using KVM without %s, %s unavailable"
+
+#define QERR_MIGRATION_ACTIVE \
+ ERROR_CLASS_GENERIC_ERROR, "There's a migration process in progress"
+
+#define QERR_MIGRATION_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "State blocked by non-migratable device '%s'"
+
+#define QERR_MISSING_PARAMETER \
+ ERROR_CLASS_GENERIC_ERROR, "Parameter '%s' is missing"
+
+#define QERR_NO_BUS_FOR_DEVICE \
+ ERROR_CLASS_GENERIC_ERROR, "No '%s' bus found for device '%s'"
+
+#define QERR_NOT_SUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "Not supported"
+
+#define QERR_PERMISSION_DENIED \
+ ERROR_CLASS_GENERIC_ERROR, "Insufficient permission to perform this operation"
+
+#define QERR_PROPERTY_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' not found"
+
+#define QERR_PROPERTY_VALUE_BAD \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' doesn't take value '%s'"
+
+#define QERR_PROPERTY_VALUE_IN_USE \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' can't take value '%s', it's in use"
+
+#define QERR_PROPERTY_VALUE_NOT_FOUND \
+ ERROR_CLASS_GENERIC_ERROR, "Property '%s.%s' can't find value '%s'"
+
+#define QERR_PROPERTY_VALUE_NOT_POWER_OF_2 \
+ ERROR_CLASS_GENERIC_ERROR, "Property %s.%s doesn't take value '%" PRId64 "', it's not a power of 2"
+
+#define QERR_PROPERTY_VALUE_OUT_OF_RANGE \
+ ERROR_CLASS_GENERIC_ERROR, "Property %s.%s doesn't take value %" PRId64 " (minimum: %" PRId64 ", maximum: %" PRId64 ")"
+
+#define QERR_QGA_COMMAND_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Guest agent command failed, error was '%s'"
+
+#define QERR_QGA_LOGGING_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Guest agent failed to log non-optional log statement"
+
+#define QERR_QMP_BAD_INPUT_OBJECT \
+ ERROR_CLASS_GENERIC_ERROR, "Expected '%s' in QMP input"
+
+#define QERR_QMP_BAD_INPUT_OBJECT_MEMBER \
+ ERROR_CLASS_GENERIC_ERROR, "QMP input object member '%s' expects '%s'"
+
+#define QERR_QMP_EXTRA_MEMBER \
+ ERROR_CLASS_GENERIC_ERROR, "QMP input object member '%s' is unexpected"
+
+#define QERR_RESET_REQUIRED \
+ ERROR_CLASS_GENERIC_ERROR, "Resetting the Virtual Machine is required"
+
+#define QERR_SET_PASSWD_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Could not set password"
+
+#define QERR_TOO_MANY_FILES \
+ ERROR_CLASS_GENERIC_ERROR, "Too many open files"
+
+#define QERR_UNDEFINED_ERROR \
+ ERROR_CLASS_GENERIC_ERROR, "An undefined error has occurred"
+
+#define QERR_UNKNOWN_BLOCK_FORMAT_FEATURE \
+ ERROR_CLASS_GENERIC_ERROR, "'%s' uses a %s feature which is not supported by this qemu version: %s"
+
+#define QERR_UNSUPPORTED \
+ ERROR_CLASS_GENERIC_ERROR, "this feature or command is not currently supported"
+
+#define QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION \
+ ERROR_CLASS_GENERIC_ERROR, "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'"
+
+#define QERR_SOCKET_CONNECT_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to connect to socket"
+
+#define QERR_SOCKET_LISTEN_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to set socket to listening mode"
+
+#define QERR_SOCKET_BIND_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to bind socket"
+
+#define QERR_SOCKET_CREATE_FAILED \
+ ERROR_CLASS_GENERIC_ERROR, "Failed to create socket"
+
+#endif /* QERROR_H */
diff --git a/contrib/qemu/include/qapi/qmp/qfloat.h b/contrib/qemu/include/qapi/qmp/qfloat.h
new file mode 100644
index 000000000..a8658443d
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qfloat.h
@@ -0,0 +1,29 @@
+/*
+ * QFloat Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QFLOAT_H
+#define QFLOAT_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QFloat {
+ QObject_HEAD;
+ double value;
+} QFloat;
+
+QFloat *qfloat_from_double(double value);
+double qfloat_get_double(const QFloat *qi);
+QFloat *qobject_to_qfloat(const QObject *obj);
+
+#endif /* QFLOAT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qint.h b/contrib/qemu/include/qapi/qmp/qint.h
new file mode 100644
index 000000000..48a41b0f2
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qint.h
@@ -0,0 +1,28 @@
+/*
+ * QInt Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QINT_H
+#define QINT_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QInt {
+ QObject_HEAD;
+ int64_t value;
+} QInt;
+
+QInt *qint_from_int(int64_t value);
+int64_t qint_get_int(const QInt *qi);
+QInt *qobject_to_qint(const QObject *obj);
+
+#endif /* QINT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qjson.h b/contrib/qemu/include/qapi/qmp/qjson.h
new file mode 100644
index 000000000..73351ed6d
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qjson.h
@@ -0,0 +1,29 @@
+/*
+ * QObject JSON integration
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QJSON_H
+#define QJSON_H
+
+#include <stdarg.h>
+#include "qemu/compiler.h"
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qstring.h"
+
+QObject *qobject_from_json(const char *string) GCC_FMT_ATTR(1, 0);
+QObject *qobject_from_jsonf(const char *string, ...) GCC_FMT_ATTR(1, 2);
+QObject *qobject_from_jsonv(const char *string, va_list *ap) GCC_FMT_ATTR(1, 0);
+
+QString *qobject_to_json(const QObject *obj);
+QString *qobject_to_json_pretty(const QObject *obj);
+
+#endif /* QJSON_H */
diff --git a/contrib/qemu/include/qapi/qmp/qlist.h b/contrib/qemu/include/qapi/qmp/qlist.h
new file mode 100644
index 000000000..6cc4831df
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qlist.h
@@ -0,0 +1,63 @@
+/*
+ * QList Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QLIST_H
+#define QLIST_H
+
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+
+typedef struct QListEntry {
+ QObject *value;
+ QTAILQ_ENTRY(QListEntry) next;
+} QListEntry;
+
+typedef struct QList {
+ QObject_HEAD;
+ QTAILQ_HEAD(,QListEntry) head;
+} QList;
+
+#define qlist_append(qlist, obj) \
+ qlist_append_obj(qlist, QOBJECT(obj))
+
+#define QLIST_FOREACH_ENTRY(qlist, var) \
+ for ((var) = ((qlist)->head.tqh_first); \
+ (var); \
+ (var) = ((var)->next.tqe_next))
+
+static inline QObject *qlist_entry_obj(const QListEntry *entry)
+{
+ return entry->value;
+}
+
+QList *qlist_new(void);
+QList *qlist_copy(QList *src);
+void qlist_append_obj(QList *qlist, QObject *obj);
+void qlist_iter(const QList *qlist,
+ void (*iter)(QObject *obj, void *opaque), void *opaque);
+QObject *qlist_pop(QList *qlist);
+QObject *qlist_peek(QList *qlist);
+int qlist_empty(const QList *qlist);
+size_t qlist_size(const QList *qlist);
+QList *qobject_to_qlist(const QObject *obj);
+
+static inline const QListEntry *qlist_first(const QList *qlist)
+{
+ return QTAILQ_FIRST(&qlist->head);
+}
+
+static inline const QListEntry *qlist_next(const QListEntry *entry)
+{
+ return QTAILQ_NEXT(entry, next);
+}
+
+#endif /* QLIST_H */
diff --git a/contrib/qemu/include/qapi/qmp/qobject.h b/contrib/qemu/include/qapi/qmp/qobject.h
new file mode 100644
index 000000000..9124649ed
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qobject.h
@@ -0,0 +1,112 @@
+/*
+ * QEMU Object Model.
+ *
+ * Based on ideas by Avi Kivity <avi@redhat.com>
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ * QObject Reference Counts Terminology
+ * ------------------------------------
+ *
+ * - Returning references: A function that returns an object may
+ * return it as either a weak or a strong reference. If the reference
+ * is strong, you are responsible for calling QDECREF() on the reference
+ * when you are done.
+ *
+ * If the reference is weak, the owner of the reference may free it at
+ * any time in the future. Before storing the reference anywhere, you
+ * should call QINCREF() to make the reference strong.
+ *
+ * - Transferring ownership: when you transfer ownership of a reference
+ * by calling a function, you are no longer responsible for calling
+ * QDECREF() when the reference is no longer needed. In other words,
+ * when the function returns you must behave as if the reference to the
+ * passed object was weak.
+ */
+#ifndef QOBJECT_H
+#define QOBJECT_H
+
+#include <stddef.h>
+#include <assert.h>
+
+typedef enum {
+ QTYPE_NONE,
+ QTYPE_QINT,
+ QTYPE_QSTRING,
+ QTYPE_QDICT,
+ QTYPE_QLIST,
+ QTYPE_QFLOAT,
+ QTYPE_QBOOL,
+ QTYPE_QERROR,
+} qtype_code;
+
+struct QObject;
+
+typedef struct QType {
+ qtype_code code;
+ void (*destroy)(struct QObject *);
+} QType;
+
+typedef struct QObject {
+ const QType *type;
+ size_t refcnt;
+} QObject;
+
+/* Objects definitions must include this */
+#define QObject_HEAD \
+ QObject base
+
+/* Get the 'base' part of an object */
+#define QOBJECT(obj) (&(obj)->base)
+
+/* High-level interface for qobject_incref() */
+#define QINCREF(obj) \
+ qobject_incref(QOBJECT(obj))
+
+/* High-level interface for qobject_decref() */
+#define QDECREF(obj) \
+ qobject_decref(obj ? QOBJECT(obj) : NULL)
+
+/* Initialize an object to default values */
+#define QOBJECT_INIT(obj, qtype_type) \
+ obj->base.refcnt = 1; \
+ obj->base.type = qtype_type
+
+/**
+ * qobject_incref(): Increment QObject's reference count
+ */
+static inline void qobject_incref(QObject *obj)
+{
+ if (obj)
+ obj->refcnt++;
+}
+
+/**
+ * qobject_decref(): Decrement QObject's reference count, deallocate
+ * when it reaches zero
+ */
+static inline void qobject_decref(QObject *obj)
+{
+ if (obj && --obj->refcnt == 0) {
+ assert(obj->type != NULL);
+ assert(obj->type->destroy != NULL);
+ obj->type->destroy(obj);
+ }
+}
+
+/**
+ * qobject_type(): Return the QObject's type
+ */
+static inline qtype_code qobject_type(const QObject *obj)
+{
+ assert(obj->type != NULL);
+ return obj->type->code;
+}
+
+#endif /* QOBJECT_H */
diff --git a/contrib/qemu/include/qapi/qmp/qstring.h b/contrib/qemu/include/qapi/qmp/qstring.h
new file mode 100644
index 000000000..1bc366610
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/qstring.h
@@ -0,0 +1,36 @@
+/*
+ * QString Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QSTRING_H
+#define QSTRING_H
+
+#include <stdint.h>
+#include "qapi/qmp/qobject.h"
+
+typedef struct QString {
+ QObject_HEAD;
+ char *string;
+ size_t length;
+ size_t capacity;
+} QString;
+
+QString *qstring_new(void);
+QString *qstring_from_str(const char *str);
+QString *qstring_from_substr(const char *str, int start, int end);
+size_t qstring_get_length(const QString *qstring);
+const char *qstring_get_str(const QString *qstring);
+void qstring_append_int(QString *qstring, int64_t value);
+void qstring_append(QString *qstring, const char *str);
+void qstring_append_chr(QString *qstring, int c);
+QString *qobject_to_qstring(const QObject *obj);
+
+#endif /* QSTRING_H */
diff --git a/contrib/qemu/include/qapi/qmp/types.h b/contrib/qemu/include/qapi/qmp/types.h
new file mode 100644
index 000000000..7782ec5a6
--- /dev/null
+++ b/contrib/qemu/include/qapi/qmp/types.h
@@ -0,0 +1,25 @@
+/*
+ * Include all QEMU objects.
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef QEMU_OBJECTS_H
+#define QEMU_OBJECTS_H
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qjson.h"
+
+#endif /* QEMU_OBJECTS_H */
diff --git a/contrib/qemu/include/qemu-common.h b/contrib/qemu/include/qemu-common.h
new file mode 100644
index 000000000..6948bb917
--- /dev/null
+++ b/contrib/qemu/include/qemu-common.h
@@ -0,0 +1,478 @@
+
+/* Common header file that is included by all of QEMU.
+ *
+ * This file is supposed to be included only by .c files. No header file should
+ * depend on qemu-common.h, as this would easily lead to circular header
+ * dependencies.
+ *
+ * If a header file uses a definition from qemu-common.h, that definition
+ * must be moved to a separate header file, and the header that uses it
+ * must include that header.
+ */
+#ifndef QEMU_COMMON_H
+#define QEMU_COMMON_H
+
+#include "qemu/compiler.h"
+#include "config-host.h"
+#include "qemu/typedefs.h"
+
+#if defined(__arm__) || defined(__sparc__) || defined(__mips__) || defined(__hppa__) || defined(__ia64__)
+#define WORDS_ALIGNED
+#endif
+
+#define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR)
+
+/* we put basic includes here to avoid repeating them in device drivers */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <strings.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <time.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <assert.h>
+#include <signal.h>
+#include "glib-compat.h"
+
+#ifdef _WIN32
+#include "sysemu/os-win32.h"
+#endif
+
+#ifdef CONFIG_POSIX
+#include "sysemu/os-posix.h"
+#endif
+
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 0
+#endif
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#ifndef ENOMEDIUM
+#define ENOMEDIUM ENODEV
+#endif
+#if !defined(ENOTSUP)
+#define ENOTSUP 4096
+#endif
+#if !defined(ECANCELED)
+#define ECANCELED 4097
+#endif
+#if !defined(EMEDIUMTYPE)
+#define EMEDIUMTYPE 4098
+#endif
+#ifndef TIME_MAX
+#define TIME_MAX LONG_MAX
+#endif
+
+/* HOST_LONG_BITS is the size of a native pointer in bits. */
+#if UINTPTR_MAX == UINT32_MAX
+# define HOST_LONG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define HOST_LONG_BITS 64
+#else
+# error Unknown pointer size
+#endif
+
+typedef int (*fprintf_function)(FILE *f, const char *fmt, ...)
+ GCC_FMT_ATTR(2, 3);
+
+#ifdef _WIN32
+#define fsync _commit
+#if !defined(lseek)
+# define lseek _lseeki64
+#endif
+int qemu_ftruncate64(int, int64_t);
+#if !defined(ftruncate)
+# define ftruncate qemu_ftruncate64
+#endif
+
+static inline char *realpath(const char *path, char *resolved_path)
+{
+ _fullpath(resolved_path, path, _MAX_PATH);
+ return resolved_path;
+}
+#endif
+
+/* icount */
+void configure_icount(const char *option);
+extern int use_icount;
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+
+/* FIXME: Remove NEED_CPU_H. */
+#ifdef NEED_CPU_H
+#include "cpu.h"
+#endif /* !defined(NEED_CPU_H) */
+
+/* main function, renamed */
+#if defined(CONFIG_COCOA)
+int qemu_main(int argc, char **argv, char **envp);
+#endif
+
+void qemu_get_timedate(struct tm *tm, int offset);
+int qemu_timedate_diff(struct tm *tm);
+
+#if !GLIB_CHECK_VERSION(2, 20, 0)
+/*
+ * Glib before 2.20.0 doesn't implement g_poll, so wrap it to compile properly
+ * on older systems.
+ */
+static inline gint g_poll(GPollFD *fds, guint nfds, gint timeout)
+{
+ GMainContext *ctx = g_main_context_default();
+ return g_main_context_get_poll_func(ctx)(fds, nfds, timeout);
+}
+#endif
+
+/**
+ * is_help_option:
+ * @s: string to test
+ *
+ * Check whether @s is one of the standard strings which indicate
+ * that the user is asking for a list of the valid values for a
+ * command option like -cpu or -M. The current accepted strings
+ * are 'help' and '?'. '?' is deprecated (it is a shell wildcard
+ * which makes it annoying to use in a reliable way) but provided
+ * for backwards compatibility.
+ *
+ * Returns: true if @s is a request for a list.
+ */
+static inline bool is_help_option(const char *s)
+{
+ return !strcmp(s, "?") || !strcmp(s, "help");
+}
+
+/* cutils.c */
+void pstrcpy(char *buf, int buf_size, const char *str);
+void strpadcpy(char *buf, int buf_size, const char *str, char pad);
+char *pstrcat(char *buf, int buf_size, const char *s);
+int strstart(const char *str, const char *val, const char **ptr);
+int stristart(const char *str, const char *val, const char **ptr);
+int qemu_strnlen(const char *s, int max_len);
+char *qemu_strsep(char **input, const char *delim);
+time_t mktimegm(struct tm *tm);
+int qemu_fls(int i);
+int qemu_fdatasync(int fd);
+int fcntl_setfl(int fd, int flag);
+int qemu_parse_fd(const char *param);
+
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+ int base);
+int parse_uint_full(const char *s, unsigned long long *value, int base);
+
+/*
+ * strtosz() suffixes used to specify the default treatment of an
+ * argument passed to strtosz() without an explicit suffix.
+ * These should be defined using upper case characters in the range
+ * A-Z, as strtosz() will use qemu_toupper() on the given argument
+ * prior to comparison.
+ */
+#define STRTOSZ_DEFSUFFIX_EB 'E'
+#define STRTOSZ_DEFSUFFIX_PB 'P'
+#define STRTOSZ_DEFSUFFIX_TB 'T'
+#define STRTOSZ_DEFSUFFIX_GB 'G'
+#define STRTOSZ_DEFSUFFIX_MB 'M'
+#define STRTOSZ_DEFSUFFIX_KB 'K'
+#define STRTOSZ_DEFSUFFIX_B 'B'
+int64_t strtosz(const char *nptr, char **end);
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix);
+int64_t strtosz_suffix_unit(const char *nptr, char **end,
+ const char default_suffix, int64_t unit);
+
+/* path.c */
+void init_paths(const char *prefix);
+const char *path(const char *pathname);
+
+#define qemu_isalnum(c) isalnum((unsigned char)(c))
+#define qemu_isalpha(c) isalpha((unsigned char)(c))
+#define qemu_iscntrl(c) iscntrl((unsigned char)(c))
+#define qemu_isdigit(c) isdigit((unsigned char)(c))
+#define qemu_isgraph(c) isgraph((unsigned char)(c))
+#define qemu_islower(c) islower((unsigned char)(c))
+#define qemu_isprint(c) isprint((unsigned char)(c))
+#define qemu_ispunct(c) ispunct((unsigned char)(c))
+#define qemu_isspace(c) isspace((unsigned char)(c))
+#define qemu_isupper(c) isupper((unsigned char)(c))
+#define qemu_isxdigit(c) isxdigit((unsigned char)(c))
+#define qemu_tolower(c) tolower((unsigned char)(c))
+#define qemu_toupper(c) toupper((unsigned char)(c))
+#define qemu_isascii(c) isascii((unsigned char)(c))
+#define qemu_toascii(c) toascii((unsigned char)(c))
+
+void *qemu_oom_check(void *ptr);
+
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+ QEMU_WARN_UNUSED_RESULT;
+ssize_t qemu_send_full(int fd, const void *buf, size_t count, int flags)
+ QEMU_WARN_UNUSED_RESULT;
+ssize_t qemu_recv_full(int fd, void *buf, size_t count, int flags)
+ QEMU_WARN_UNUSED_RESULT;
+
+#ifndef _WIN32
+int qemu_pipe(int pipefd[2]);
+/* like openpty() but also makes it raw; return master fd */
+int qemu_openpty_raw(int *aslave, char *pty_name);
+#endif
+
+#ifdef _WIN32
+/* MinGW needs type casts for the 'buf' and 'optval' arguments. */
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+ getsockopt(sockfd, level, optname, (void *)optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+ setsockopt(sockfd, level, optname, (const void *)optval, optlen)
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+ sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen)
+#else
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+ getsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+ setsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+ sendto(sockfd, buf, len, flags, destaddr, addrlen)
+#endif
+
+/* Error handling. */
+
+void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+
+struct ParallelIOArg {
+ void *buffer;
+ int count;
+};
+
+typedef int (*DMA_transfer_handler) (void *opaque, int nchan, int pos, int size);
+
+typedef uint64_t pcibus_t;
+
+typedef enum LostTickPolicy {
+ LOST_TICK_DISCARD,
+ LOST_TICK_DELAY,
+ LOST_TICK_MERGE,
+ LOST_TICK_SLEW,
+ LOST_TICK_MAX
+} LostTickPolicy;
+
+typedef struct PCIHostDeviceAddress {
+ unsigned int domain;
+ unsigned int bus;
+ unsigned int slot;
+ unsigned int function;
+} PCIHostDeviceAddress;
+
+void tcg_exec_init(unsigned long tb_size);
+bool tcg_enabled(void);
+
+void cpu_exec_init_all(void);
+
+/* CPU save/load. */
+#ifdef CPU_SAVE_VERSION
+void cpu_save(QEMUFile *f, void *opaque);
+int cpu_load(QEMUFile *f, void *opaque, int version_id);
+#endif
+
+/* Unblock cpu */
+void qemu_cpu_kick_self(void);
+
+/* work queue */
+struct qemu_work_item {
+ struct qemu_work_item *next;
+ void (*func)(void *data);
+ void *data;
+ int done;
+ bool free;
+};
+
+
+/**
+ * Sends a (part of) iovec down a socket, yielding when the socket is full, or
+ * Receives data into a (part of) iovec from a socket,
+ * yielding when there is no data in the socket.
+ * The same interface as qemu_sendv_recvv(), with added yielding.
+ * XXX should mark these as coroutine_fn
+ */
+ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send);
+#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \
+ qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \
+ qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * The same as above, but with just a single buffer
+ */
+ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send);
+#define qemu_co_recv(sockfd, buf, bytes) \
+ qemu_co_send_recv(sockfd, buf, bytes, false)
+#define qemu_co_send(sockfd, buf, bytes) \
+ qemu_co_send_recv(sockfd, buf, bytes, true)
+
+typedef struct QEMUIOVector {
+ struct iovec *iov;
+ int niov;
+ int nalloc;
+ size_t size;
+} QEMUIOVector;
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+void qemu_iovec_concat(QEMUIOVector *dst,
+ QEMUIOVector *src, size_t soffset, size_t sbytes);
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+ struct iovec *src_iov, unsigned int src_cnt,
+ size_t soffset, size_t sbytes);
+void qemu_iovec_destroy(QEMUIOVector *qiov);
+void qemu_iovec_reset(QEMUIOVector *qiov);
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+ void *buf, size_t bytes);
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+ const void *buf, size_t bytes);
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+ int fillc, size_t bytes);
+
+bool buffer_is_zero(const void *buf, size_t len);
+
+void qemu_progress_init(int enabled, float min_skip);
+void qemu_progress_end(void);
+void qemu_progress_print(float delta, int max);
+const char *qemu_get_vm_name(void);
+
+#define QEMU_FILE_TYPE_BIOS 0
+#define QEMU_FILE_TYPE_KEYMAP 1
+char *qemu_find_file(int type, const char *name);
+
+/* OS specific functions */
+void os_setup_early_signal_handling(void);
+char *os_find_datadir(const char *argv0);
+void os_parse_cmd_args(int index, const char *optarg);
+void os_pidfile_error(void);
+
+/* Convert a byte between binary and BCD. */
+static inline uint8_t to_bcd(uint8_t val)
+{
+ return ((val / 10) << 4) | (val % 10);
+}
+
+static inline uint8_t from_bcd(uint8_t val)
+{
+ return ((val >> 4) * 10) + (val & 0x0f);
+}
+
+/* compute with 96 bit intermediate result: (a*b)/c */
+static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
+{
+ union {
+ uint64_t ll;
+ struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } u, res;
+ uint64_t rl, rh;
+
+ u.ll = a;
+ rl = (uint64_t)u.l.low * (uint64_t)b;
+ rh = (uint64_t)u.l.high * (uint64_t)b;
+ rh += (rl >> 32);
+ res.l.high = rh / c;
+ res.l.low = (((rh % c) << 32) + (rl & 0xffffffff)) / c;
+ return res.ll;
+}
+
+/* Round number down to multiple */
+#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m))
+
+/* Round number up to multiple */
+#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m))
+
+static inline bool is_power_of_2(uint64_t value)
+{
+ if (!value) {
+ return 0;
+ }
+
+ return !(value & (value - 1));
+}
+
+/* round down to the nearest power of 2*/
+int64_t pow2floor(int64_t value);
+
+#include "qemu/module.h"
+
+/*
+ * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+
+int uleb128_encode_small(uint8_t *out, uint32_t n);
+int uleb128_decode_small(const uint8_t *in, uint32_t *n);
+
+/* unicode.c */
+int mod_utf8_codepoint(const char *s, size_t n, char **end);
+
+/*
+ * Hexdump a buffer to a file. An optional string prefix is added to every line
+ */
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
+
+/* vector definitions */
+#ifdef __ALTIVEC__
+#include <altivec.h>
+/* The altivec.h header says we're allowed to undef these for
+ * C++ compatibility. Here we don't care about C++, but we
+ * undef them anyway to avoid namespace pollution.
+ */
+#undef vector
+#undef pixel
+#undef bool
+#define VECTYPE __vector unsigned char
+#define SPLAT(p) vec_splat(vec_ld(0, p), 0)
+#define ALL_EQ(v1, v2) vec_all_eq(v1, v2)
+/* altivec.h may redefine the bool macro as vector type.
+ * Reset it to POSIX semantics. */
+#define bool _Bool
+#elif defined __SSE2__
+#include <emmintrin.h>
+#define VECTYPE __m128i
+#define SPLAT(p) _mm_set1_epi8(*(p))
+#define ALL_EQ(v1, v2) (_mm_movemask_epi8(_mm_cmpeq_epi8(v1, v2)) == 0xFFFF)
+#else
+#define VECTYPE unsigned long
+#define SPLAT(p) (*(p) * (~0UL / 255))
+#define ALL_EQ(v1, v2) ((v1) == (v2))
+#endif
+
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR 8
+static inline bool
+can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR
+ * sizeof(VECTYPE)) == 0
+ && ((uintptr_t) buf) % sizeof(VECTYPE) == 0);
+}
+size_t buffer_find_nonzero_offset(const void *buf, size_t len);
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial);
+
+#endif
diff --git a/contrib/qemu/include/qemu/aes.h b/contrib/qemu/include/qemu/aes.h
new file mode 100644
index 000000000..e79c70743
--- /dev/null
+++ b/contrib/qemu/include/qemu/aes.h
@@ -0,0 +1,45 @@
+#ifndef QEMU_AES_H
+#define QEMU_AES_H
+
+#define AES_MAXNR 14
+#define AES_BLOCK_SIZE 16
+
+struct aes_key_st {
+ uint32_t rd_key[4 *(AES_MAXNR + 1)];
+ int rounds;
+};
+typedef struct aes_key_st AES_KEY;
+
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key);
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc);
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+extern const uint32_t AES_Te0[256], AES_Te1[256], AES_Te2[256],
+ AES_Te3[256], AES_Te4[256];
+extern const uint32_t AES_Td0[256], AES_Td1[256], AES_Td2[256],
+ AES_Td3[256], AES_Td4[256];
+
+#endif
diff --git a/contrib/qemu/include/qemu/atomic.h b/contrib/qemu/include/qemu/atomic.h
new file mode 100644
index 000000000..0aa891330
--- /dev/null
+++ b/contrib/qemu/include/qemu/atomic.h
@@ -0,0 +1,202 @@
+/*
+ * Simple interface for atomic operations.
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef __QEMU_ATOMIC_H
+#define __QEMU_ATOMIC_H 1
+
+#include "qemu/compiler.h"
+
+/* For C11 atomic ops */
+
+/* Compiler barrier */
+#define barrier() ({ asm volatile("" ::: "memory"); (void)0; })
+
+#ifndef __ATOMIC_RELAXED
+
+/*
+ * We use GCC builtin if it's available, as that can use mfence on
+ * 32-bit as well, e.g. if built with -march=pentium-m. However, on
+ * i386 the spec is buggy, and the implementation followed it until
+ * 4.3 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36793).
+ */
+#if defined(__i386__) || defined(__x86_64__)
+#if !QEMU_GNUC_PREREQ(4, 4)
+#if defined __x86_64__
+#define smp_mb() ({ asm volatile("mfence" ::: "memory"); (void)0; })
+#else
+#define smp_mb() ({ asm volatile("lock; addl $0,0(%%esp) " ::: "memory"); (void)0; })
+#endif
+#endif
+#endif
+
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() asm volatile("mb":::"memory")
+#endif
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+
+/*
+ * Because of the strongly ordered storage model, wmb() and rmb() are nops
+ * here (a compiler barrier only). QEMU doesn't do accesses to write-combining
+ * qemu memory or non-temporal load/stores from C code.
+ */
+#define smp_wmb() barrier()
+#define smp_rmb() barrier()
+
+/*
+ * __sync_lock_test_and_set() is documented to be an acquire barrier only,
+ * but it is a full barrier at the hardware level. Add a compiler barrier
+ * to make it a full barrier also at the compiler level.
+ */
+#define atomic_xchg(ptr, i) (barrier(), __sync_lock_test_and_set(ptr, i))
+
+/*
+ * Load/store with Java volatile semantics.
+ */
+#define atomic_mb_set(ptr, i) ((void)atomic_xchg(ptr, i))
+
+#elif defined(_ARCH_PPC)
+
+/*
+ * We use an eieio() for wmb() on powerpc. This assumes we don't
+ * need to order cacheable and non-cacheable stores with respect to
+ * each other.
+ *
+ * smp_mb has the same problem as on x86 for not-very-new GCC
+ * (http://patchwork.ozlabs.org/patch/126184/, Nov 2011).
+ */
+#define smp_wmb() ({ asm volatile("eieio" ::: "memory"); (void)0; })
+#if defined(__powerpc64__)
+#define smp_rmb() ({ asm volatile("lwsync" ::: "memory"); (void)0; })
+#else
+#define smp_rmb() ({ asm volatile("sync" ::: "memory"); (void)0; })
+#endif
+#define smp_mb() ({ asm volatile("sync" ::: "memory"); (void)0; })
+
+#endif /* _ARCH_PPC */
+
+#endif /* C11 atomics */
+
+/*
+ * For (host) platforms we don't have explicit barrier definitions
+ * for, we use the gcc __sync_synchronize() primitive to generate a
+ * full barrier. This should be safe on all platforms, though it may
+ * be overkill for smp_wmb() and smp_rmb().
+ */
+#ifndef smp_mb
+#define smp_mb() __sync_synchronize()
+#endif
+
+#ifndef smp_wmb
+#ifdef __ATOMIC_RELEASE
+#define smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE)
+#else
+#define smp_wmb() __sync_synchronize()
+#endif
+#endif
+
+#ifndef smp_rmb
+#ifdef __ATOMIC_ACQUIRE
+#define smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE)
+#else
+#define smp_rmb() __sync_synchronize()
+#endif
+#endif
+
+#ifndef smp_read_barrier_depends
+#ifdef __ATOMIC_CONSUME
+#define smp_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME)
+#else
+#define smp_read_barrier_depends() barrier()
+#endif
+#endif
+
+#ifndef atomic_read
+#define atomic_read(ptr) (*(__typeof__(*ptr) *volatile) (ptr))
+#endif
+
+#ifndef atomic_set
+#define atomic_set(ptr, i) ((*(__typeof__(*ptr) *volatile) (ptr)) = (i))
+#endif
+
+/* These have the same semantics as Java volatile variables.
+ * See http://gee.cs.oswego.edu/dl/jmm/cookbook.html:
+ * "1. Issue a StoreStore barrier (wmb) before each volatile store."
+ * 2. Issue a StoreLoad barrier after each volatile store.
+ * Note that you could instead issue one before each volatile load, but
+ * this would be slower for typical programs using volatiles in which
+ * reads greatly outnumber writes. Alternatively, if available, you
+ * can implement volatile store as an atomic instruction (for example
+ * XCHG on x86) and omit the barrier. This may be more efficient if
+ * atomic instructions are cheaper than StoreLoad barriers.
+ * 3. Issue LoadLoad and LoadStore barriers after each volatile load."
+ *
+ * If you prefer to think in terms of "pairing" of memory barriers,
+ * an atomic_mb_read pairs with an atomic_mb_set.
+ *
+ * And for the few ia64 lovers that exist, an atomic_mb_read is a ld.acq,
+ * while an atomic_mb_set is a st.rel followed by a memory barrier.
+ *
+ * These are a bit weaker than __atomic_load/store with __ATOMIC_SEQ_CST
+ * (see docs/atomics.txt), and I'm not sure that __ATOMIC_ACQ_REL is enough.
+ * Just always use the barriers manually by the rules above.
+ */
+#ifndef atomic_mb_read
+#define atomic_mb_read(ptr) ({ \
+ typeof(*ptr) _val = atomic_read(ptr); \
+ smp_rmb(); \
+ _val; \
+})
+#endif
+
+#ifndef atomic_mb_set
+#define atomic_mb_set(ptr, i) do { \
+ smp_wmb(); \
+ atomic_set(ptr, i); \
+ smp_mb(); \
+} while (0)
+#endif
+
+#ifndef atomic_xchg
+#ifdef __ATOMIC_SEQ_CST
+#define atomic_xchg(ptr, i) ({ \
+ typeof(*ptr) _new = (i), _old; \
+ __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
+ _old; \
+})
+#elif defined __clang__
+#define atomic_xchg(ptr, i) __sync_exchange(ptr, i)
+#else
+/* __sync_lock_test_and_set() is documented to be an acquire barrier only. */
+#define atomic_xchg(ptr, i) (smp_mb(), __sync_lock_test_and_set(ptr, i))
+#endif
+#endif
+
+/* Provide shorter names for GCC atomic builtins. */
+#define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1)
+#define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1)
+#define atomic_fetch_add __sync_fetch_and_add
+#define atomic_fetch_sub __sync_fetch_and_sub
+#define atomic_fetch_and __sync_fetch_and_and
+#define atomic_fetch_or __sync_fetch_and_or
+#define atomic_cmpxchg __sync_val_compare_and_swap
+
+/* And even shorter names that return void. */
+#define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1))
+#define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1))
+#define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n))
+#define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n))
+#define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n))
+#define atomic_or(ptr, n) ((void) __sync_fetch_and_or(ptr, n))
+
+#endif
diff --git a/contrib/qemu/include/qemu/bitmap.h b/contrib/qemu/include/qemu/bitmap.h
new file mode 100644
index 000000000..308bbb71e
--- /dev/null
+++ b/contrib/qemu/include/qemu/bitmap.h
@@ -0,0 +1,222 @@
+/*
+ * Bitmap Module
+ *
+ * Copyright (C) 2010 Corentin Chary <corentin.chary@gmail.com>
+ *
+ * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BITMAP_H
+#define BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+
+/*
+ * The available bitmap operations and their rough meaning in the
+ * case that the bitmap is a single unsigned long are thus:
+ *
+ * Note that nbits should be always a compile time evaluable constant.
+ * Otherwise many inlines will generate horrible code.
+ *
+ * bitmap_zero(dst, nbits) *dst = 0UL
+ * bitmap_fill(dst, nbits) *dst = ~0UL
+ * bitmap_copy(dst, src, nbits) *dst = *src
+ * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2
+ * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2
+ * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2
+ * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2)
+ * bitmap_complement(dst, src, nbits) *dst = ~(*src)
+ * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal?
+ * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap?
+ * bitmap_empty(src, nbits) Are all bits zero in *src?
+ * bitmap_full(src, nbits) Are all bits set in *src?
+ * bitmap_set(dst, pos, nbits) Set specified bit area
+ * bitmap_clear(dst, pos, nbits) Clear specified bit area
+ * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
+ */
+
+/*
+ * Also the following operations apply to bitmaps.
+ *
+ * set_bit(bit, addr) *addr |= bit
+ * clear_bit(bit, addr) *addr &= ~bit
+ * change_bit(bit, addr) *addr ^= bit
+ * test_bit(bit, addr) Is bit set in *addr?
+ * test_and_set_bit(bit, addr) Set bit and return old value
+ * test_and_clear_bit(bit, addr) Clear bit and return old value
+ * test_and_change_bit(bit, addr) Change bit and return old value
+ * find_first_zero_bit(addr, nbits) Position first zero bit in *addr
+ * find_first_bit(addr, nbits) Position first set bit in *addr
+ * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit
+ * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit
+ */
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+ ( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+ )
+
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+#define small_nbits(nbits) \
+ ((nbits) <= BITS_PER_LONG)
+
+int slow_bitmap_empty(const unsigned long *bitmap, int bits);
+int slow_bitmap_full(const unsigned long *bitmap, int bits);
+int slow_bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits);
+void slow_bitmap_shift_right(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+void slow_bitmap_shift_left(unsigned long *dst,
+ const unsigned long *src, int shift, int bits);
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+
+static inline unsigned long *bitmap_new(int nbits)
+{
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ return g_malloc0(len);
+}
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = 0UL;
+ } else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+
+static inline void bitmap_fill(unsigned long *dst, int nbits)
+{
+ size_t nlongs = BITS_TO_LONGS(nbits);
+ if (!small_nbits(nbits)) {
+ int len = (nlongs - 1) * sizeof(unsigned long);
+ memset(dst, 0xff, len);
+ }
+ dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src;
+ } else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memcpy(dst, src, len);
+ }
+}
+
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return (*dst = *src1 & *src2) != 0;
+ }
+ return slow_bitmap_and(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src1 | *src2;
+ } else {
+ slow_bitmap_or(dst, src1, src2, nbits);
+ }
+}
+
+static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = *src1 ^ *src2;
+ } else {
+ slow_bitmap_xor(dst, src1, src2, nbits);
+ }
+}
+
+static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return (*dst = *src1 & ~(*src2)) != 0;
+ }
+ return slow_bitmap_andnot(dst, src1, src2, nbits);
+}
+
+static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int nbits)
+{
+ if (small_nbits(nbits)) {
+ *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
+ } else {
+ slow_bitmap_complement(dst, src, nbits);
+ }
+}
+
+static inline int bitmap_equal(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_equal(src1, src2, nbits);
+ }
+}
+
+static inline int bitmap_empty(const unsigned long *src, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_empty(src, nbits);
+ }
+}
+
+static inline int bitmap_full(const unsigned long *src, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+ } else {
+ return slow_bitmap_full(src, nbits);
+ }
+}
+
+static inline int bitmap_intersects(const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_nbits(nbits)) {
+ return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ } else {
+ return slow_bitmap_intersects(src1, src2, nbits);
+ }
+}
+
+void bitmap_set(unsigned long *map, int i, int len);
+void bitmap_clear(unsigned long *map, int start, int nr);
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask);
+
+#endif /* BITMAP_H */
diff --git a/contrib/qemu/include/qemu/bitops.h b/contrib/qemu/include/qemu/bitops.h
new file mode 100644
index 000000000..affcc969d
--- /dev/null
+++ b/contrib/qemu/include/qemu/bitops.h
@@ -0,0 +1,276 @@
+/*
+ * Bitops Module
+ *
+ * Copyright (C) 2010 Corentin Chary <corentin.chary@gmail.com>
+ *
+ * Mostly inspired by (stolen from) linux/bitmap.h and linux/bitops.h
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#ifndef BITOPS_H
+#define BITOPS_H
+
+#include "qemu-common.h"
+#include "host-utils.h"
+
+#define BITS_PER_BYTE CHAR_BIT
+#define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE)
+
+#define BIT(nr) (1UL << (nr))
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+
+/**
+ * set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ */
+static inline void set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ */
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to change
+ * @addr: Address to start counting from
+ */
+static inline void change_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ */
+static inline int test_and_set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its old value
+ * @nr: Bit to change
+ * @addr: Address to count from
+ */
+static inline int test_and_change_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = addr + BIT_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit, or size.
+ */
+unsigned long find_last_bit(const unsigned long *addr,
+ unsigned long size);
+
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+unsigned long find_next_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ */
+
+unsigned long find_next_zero_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first set bit.
+ */
+static inline unsigned long find_first_bit(const unsigned long *addr,
+ unsigned long size)
+{
+ return find_next_bit(addr, size, 0);
+}
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit number of the first cleared bit.
+ */
+static inline unsigned long find_first_zero_bit(const unsigned long *addr,
+ unsigned long size)
+{
+ return find_next_zero_bit(addr, size, 0);
+}
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ unsigned long count;
+
+ for (count = 0; w; w >>= 1) {
+ count += w & 1;
+ }
+ return count;
+}
+
+/**
+ * extract32:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 32 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 32 bit word. It is valid to request that
+ * all 32 bits are returned (ie @length 32 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint32_t extract32(uint32_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 32 - start);
+ return (value >> start) & (~0U >> (32 - length));
+}
+
+/**
+ * extract64:
+ * @value: the value to extract the bit field from
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ *
+ * Extract from the 64 bit input @value the bit field specified by the
+ * @start and @length parameters, and return it. The bit field must
+ * lie entirely within the 64 bit word. It is valid to request that
+ * all 64 bits are returned (ie @length 64 and @start 0).
+ *
+ * Returns: the value of the bit field extracted from the input value.
+ */
+static inline uint64_t extract64(uint64_t value, int start, int length)
+{
+ assert(start >= 0 && length > 0 && length <= 64 - start);
+ return (value >> start) & (~0ULL >> (64 - length));
+}
+
+/**
+ * deposit32:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 32 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 32 bit word.
+ * It is valid to request that all 32 bits are modified (ie @length
+ * 32 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint32_t deposit32(uint32_t value, int start, int length,
+ uint32_t fieldval)
+{
+ uint32_t mask;
+ assert(start >= 0 && length > 0 && length <= 32 - start);
+ mask = (~0U >> (32 - length)) << start;
+ return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+/**
+ * deposit64:
+ * @value: initial value to insert bit field into
+ * @start: the lowest bit in the bit field (numbered from 0)
+ * @length: the length of the bit field
+ * @fieldval: the value to insert into the bit field
+ *
+ * Deposit @fieldval into the 64 bit @value at the bit field specified
+ * by the @start and @length parameters, and return the modified
+ * @value. Bits of @value outside the bit field are not modified.
+ * Bits of @fieldval above the least significant @length bits are
+ * ignored. The bit field must lie entirely within the 64 bit word.
+ * It is valid to request that all 64 bits are modified (ie @length
+ * 64 and @start 0).
+ *
+ * Returns: the modified @value.
+ */
+static inline uint64_t deposit64(uint64_t value, int start, int length,
+ uint64_t fieldval)
+{
+ uint64_t mask;
+ assert(start >= 0 && length > 0 && length <= 64 - start);
+ mask = (~0ULL >> (64 - length)) << start;
+ return (value & ~mask) | ((fieldval << start) & mask);
+}
+
+#endif
diff --git a/contrib/qemu/include/qemu/bswap.h b/contrib/qemu/include/qemu/bswap.h
new file mode 100644
index 000000000..14a5f657c
--- /dev/null
+++ b/contrib/qemu/include/qemu/bswap.h
@@ -0,0 +1,478 @@
+#ifndef BSWAP_H
+#define BSWAP_H
+
+#include "config-host.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <string.h>
+#include "fpu/softfloat.h"
+
+#ifdef CONFIG_MACHINE_BSWAP_H
+# include <sys/endian.h>
+# include <sys/types.h>
+# include <machine/bswap.h>
+#elif defined(CONFIG_BYTESWAP_H)
+# include <byteswap.h>
+
+static inline uint16_t bswap16(uint16_t x)
+{
+ return bswap_16(x);
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return bswap_32(x);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return bswap_64(x);
+}
+# else
+static inline uint16_t bswap16(uint16_t x)
+{
+ return (((x & 0x00ff) << 8) |
+ ((x & 0xff00) >> 8));
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+ return (((x & 0x000000ffU) << 24) |
+ ((x & 0x0000ff00U) << 8) |
+ ((x & 0x00ff0000U) >> 8) |
+ ((x & 0xff000000U) >> 24));
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+ return (((x & 0x00000000000000ffULL) << 56) |
+ ((x & 0x000000000000ff00ULL) << 40) |
+ ((x & 0x0000000000ff0000ULL) << 24) |
+ ((x & 0x00000000ff000000ULL) << 8) |
+ ((x & 0x000000ff00000000ULL) >> 8) |
+ ((x & 0x0000ff0000000000ULL) >> 24) |
+ ((x & 0x00ff000000000000ULL) >> 40) |
+ ((x & 0xff00000000000000ULL) >> 56));
+}
+#endif /* ! CONFIG_MACHINE_BSWAP_H */
+
+static inline void bswap16s(uint16_t *s)
+{
+ *s = bswap16(*s);
+}
+
+static inline void bswap32s(uint32_t *s)
+{
+ *s = bswap32(*s);
+}
+
+static inline void bswap64s(uint64_t *s)
+{
+ *s = bswap64(*s);
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define be_bswap(v, size) (v)
+#define le_bswap(v, size) glue(bswap, size)(v)
+#define be_bswaps(v, size)
+#define le_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#else
+#define le_bswap(v, size) (v)
+#define be_bswap(v, size) glue(bswap, size)(v)
+#define le_bswaps(v, size)
+#define be_bswaps(p, size) do { *p = glue(bswap, size)(*p); } while(0)
+#endif
+
+#define CPU_CONVERT(endian, size, type)\
+static inline type endian ## size ## _to_cpu(type v)\
+{\
+ return glue(endian, _bswap)(v, size);\
+}\
+\
+static inline type cpu_to_ ## endian ## size(type v)\
+{\
+ return glue(endian, _bswap)(v, size);\
+}\
+\
+static inline void endian ## size ## _to_cpus(type *p)\
+{\
+ glue(endian, _bswaps)(p, size);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## s(type *p)\
+{\
+ glue(endian, _bswaps)(p, size);\
+}\
+\
+static inline type endian ## size ## _to_cpup(const type *p)\
+{\
+ return glue(glue(endian, size), _to_cpu)(*p);\
+}\
+\
+static inline void cpu_to_ ## endian ## size ## w(type *p, type v)\
+{\
+ *p = glue(glue(cpu_to_, endian), size)(v);\
+}
+
+CPU_CONVERT(be, 16, uint16_t)
+CPU_CONVERT(be, 32, uint32_t)
+CPU_CONVERT(be, 64, uint64_t)
+
+CPU_CONVERT(le, 16, uint16_t)
+CPU_CONVERT(le, 32, uint32_t)
+CPU_CONVERT(le, 64, uint64_t)
+
+/* len must be one of 1, 2, 4 */
+static inline uint32_t qemu_bswap_len(uint32_t value, int len)
+{
+ return bswap32(value) >> (32 - 8 * len);
+}
+
+/* Unions for reinterpreting between floats and integers. */
+
+typedef union {
+ float32 f;
+ uint32_t l;
+} CPU_FloatU;
+
+typedef union {
+ float64 d;
+#if defined(HOST_WORDS_BIGENDIAN)
+ struct {
+ uint32_t upper;
+ uint32_t lower;
+ } l;
+#else
+ struct {
+ uint32_t lower;
+ uint32_t upper;
+ } l;
+#endif
+ uint64_t ll;
+} CPU_DoubleU;
+
+typedef union {
+ floatx80 d;
+ struct {
+ uint64_t lower;
+ uint16_t upper;
+ } l;
+} CPU_LDoubleU;
+
+typedef union {
+ float128 q;
+#if defined(HOST_WORDS_BIGENDIAN)
+ struct {
+ uint32_t upmost;
+ uint32_t upper;
+ uint32_t lower;
+ uint32_t lowest;
+ } l;
+ struct {
+ uint64_t upper;
+ uint64_t lower;
+ } ll;
+#else
+ struct {
+ uint32_t lowest;
+ uint32_t lower;
+ uint32_t upper;
+ uint32_t upmost;
+ } l;
+ struct {
+ uint64_t lower;
+ uint64_t upper;
+ } ll;
+#endif
+} CPU_QuadU;
+
+/* unaligned/endian-independent pointer access */
+
+/*
+ * the generic syntax is:
+ *
+ * load: ld{type}{sign}{size}{endian}_p(ptr)
+ *
+ * store: st{type}{size}{endian}_p(ptr, val)
+ *
+ * Note there are small differences with the softmmu access API!
+ *
+ * type is:
+ * (empty): integer access
+ * f : float access
+ *
+ * sign is:
+ * (empty): for floats or 32 bit size
+ * u : unsigned
+ * s : signed
+ *
+ * size is:
+ * b: 8 bits
+ * w: 16 bits
+ * l: 32 bits
+ * q: 64 bits
+ *
+ * endian is:
+ * (empty): host endian
+ * be : big endian
+ * le : little endian
+ */
+
+static inline int ldub_p(const void *ptr)
+{
+ return *(uint8_t *)ptr;
+}
+
+static inline int ldsb_p(const void *ptr)
+{
+ return *(int8_t *)ptr;
+}
+
+static inline void stb_p(void *ptr, int v)
+{
+ *(uint8_t *)ptr = v;
+}
+
+/* Any compiler worth its salt will turn these memcpy into native unaligned
+ operations. Thus we don't need to play games with packed attributes, or
+ inline byte-by-byte stores. */
+
+static inline int lduw_p(const void *ptr)
+{
+ uint16_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline int ldsw_p(const void *ptr)
+{
+ int16_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stw_p(void *ptr, uint16_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline int ldl_p(const void *ptr)
+{
+ int32_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stl_p(void *ptr, uint32_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline uint64_t ldq_p(const void *ptr)
+{
+ uint64_t r;
+ memcpy(&r, ptr, sizeof(r));
+ return r;
+}
+
+static inline void stq_p(void *ptr, uint64_t v)
+{
+ memcpy(ptr, &v, sizeof(v));
+}
+
+static inline int lduw_le_p(const void *ptr)
+{
+ return (uint16_t)le_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldsw_le_p(const void *ptr)
+{
+ return (int16_t)le_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldl_le_p(const void *ptr)
+{
+ return le_bswap(ldl_p(ptr), 32);
+}
+
+static inline uint64_t ldq_le_p(const void *ptr)
+{
+ return le_bswap(ldq_p(ptr), 64);
+}
+
+static inline void stw_le_p(void *ptr, int v)
+{
+ stw_p(ptr, le_bswap(v, 16));
+}
+
+static inline void stl_le_p(void *ptr, int v)
+{
+ stl_p(ptr, le_bswap(v, 32));
+}
+
+static inline void stq_le_p(void *ptr, uint64_t v)
+{
+ stq_p(ptr, le_bswap(v, 64));
+}
+
+/* float access */
+
+static inline float32 ldfl_le_p(const void *ptr)
+{
+ CPU_FloatU u;
+ u.l = ldl_le_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_le_p(void *ptr, float32 v)
+{
+ CPU_FloatU u;
+ u.f = v;
+ stl_le_p(ptr, u.l);
+}
+
+static inline float64 ldfq_le_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.ll = ldq_le_p(ptr);
+ return u.d;
+}
+
+static inline void stfq_le_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stq_le_p(ptr, u.ll);
+}
+
+static inline int lduw_be_p(const void *ptr)
+{
+ return (uint16_t)be_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldsw_be_p(const void *ptr)
+{
+ return (int16_t)be_bswap(lduw_p(ptr), 16);
+}
+
+static inline int ldl_be_p(const void *ptr)
+{
+ return be_bswap(ldl_p(ptr), 32);
+}
+
+static inline uint64_t ldq_be_p(const void *ptr)
+{
+ return be_bswap(ldq_p(ptr), 64);
+}
+
+static inline void stw_be_p(void *ptr, int v)
+{
+ stw_p(ptr, be_bswap(v, 16));
+}
+
+static inline void stl_be_p(void *ptr, int v)
+{
+ stl_p(ptr, be_bswap(v, 32));
+}
+
+static inline void stq_be_p(void *ptr, uint64_t v)
+{
+ stq_p(ptr, be_bswap(v, 64));
+}
+
+/* float access */
+
+static inline float32 ldfl_be_p(const void *ptr)
+{
+ CPU_FloatU u;
+ u.l = ldl_be_p(ptr);
+ return u.f;
+}
+
+static inline void stfl_be_p(void *ptr, float32 v)
+{
+ CPU_FloatU u;
+ u.f = v;
+ stl_be_p(ptr, u.l);
+}
+
+static inline float64 ldfq_be_p(const void *ptr)
+{
+ CPU_DoubleU u;
+ u.ll = ldq_be_p(ptr);
+ return u.d;
+}
+
+static inline void stfq_be_p(void *ptr, float64 v)
+{
+ CPU_DoubleU u;
+ u.d = v;
+ stq_be_p(ptr, u.ll);
+}
+
+/* Legacy unaligned versions. Note that we never had a complete set. */
+
+static inline void cpu_to_le16wu(uint16_t *p, uint16_t v)
+{
+ stw_le_p(p, v);
+}
+
+static inline void cpu_to_le32wu(uint32_t *p, uint32_t v)
+{
+ stl_le_p(p, v);
+}
+
+static inline uint16_t le16_to_cpupu(const uint16_t *p)
+{
+ return lduw_le_p(p);
+}
+
+static inline uint32_t le32_to_cpupu(const uint32_t *p)
+{
+ return ldl_le_p(p);
+}
+
+static inline uint32_t be32_to_cpupu(const uint32_t *p)
+{
+ return ldl_be_p(p);
+}
+
+static inline void cpu_to_be16wu(uint16_t *p, uint16_t v)
+{
+ stw_be_p(p, v);
+}
+
+static inline void cpu_to_be32wu(uint32_t *p, uint32_t v)
+{
+ stl_be_p(p, v);
+}
+
+static inline void cpu_to_be64wu(uint64_t *p, uint64_t v)
+{
+ stq_be_p(p, v);
+}
+
+static inline void cpu_to_32wu(uint32_t *p, uint32_t v)
+{
+ stl_p(p, v);
+}
+
+static inline unsigned long leul_to_cpu(unsigned long v)
+{
+ /* In order to break an include loop between here and
+ qemu-common.h, don't rely on HOST_LONG_BITS. */
+#if ULONG_MAX == UINT32_MAX
+ return le_bswap(v, 32);
+#elif ULONG_MAX == UINT64_MAX
+ return le_bswap(v, 64);
+#else
+# error Unknown sizeof long
+#endif
+}
+
+#undef le_bswap
+#undef be_bswap
+#undef le_bswaps
+#undef be_bswaps
+
+#endif /* BSWAP_H */
diff --git a/contrib/qemu/include/qemu/compiler.h b/contrib/qemu/include/qemu/compiler.h
new file mode 100644
index 000000000..155b35896
--- /dev/null
+++ b/contrib/qemu/include/qemu/compiler.h
@@ -0,0 +1,55 @@
+/* public domain */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#include "config-host.h"
+
+/*----------------------------------------------------------------------------
+| The macro QEMU_GNUC_PREREQ tests for minimum version of the GNU C compiler.
+| The code is a copy of SOFTFLOAT_GNUC_PREREQ, see softfloat-macros.h.
+*----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define QEMU_GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define QEMU_GNUC_PREREQ(maj, min) 0
+#endif
+
+#define QEMU_NORETURN __attribute__ ((__noreturn__))
+
+#if QEMU_GNUC_PREREQ(3, 4)
+#define QEMU_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define QEMU_WARN_UNUSED_RESULT
+#endif
+
+#if defined(_WIN32)
+# define QEMU_PACKED __attribute__((gcc_struct, packed))
+#else
+# define QEMU_PACKED __attribute__((packed))
+#endif
+
+#define cat(x,y) x ## y
+#define cat2(x,y) cat(x,y)
+#define QEMU_BUILD_BUG_ON(x) \
+ typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1] __attribute__((unused));
+
+#if defined __GNUC__
+# if !QEMU_GNUC_PREREQ(4, 4)
+ /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
+# define GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
+# else
+ /* Use gnu_printf when supported (qemu uses standard format strings). */
+# define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+# if defined(_WIN32)
+ /* Map __printf__ to __gnu_printf__ because we want standard format strings
+ * even when MinGW or GLib include files use __printf__. */
+# define __printf__ __gnu_printf__
+# endif
+# endif
+#else
+#define GCC_FMT_ATTR(n, m)
+#endif
+
+#endif /* COMPILER_H */
diff --git a/contrib/qemu/include/qemu/error-report.h b/contrib/qemu/include/qemu/error-report.h
new file mode 100644
index 000000000..3b098a917
--- /dev/null
+++ b/contrib/qemu/include/qemu/error-report.h
@@ -0,0 +1,46 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_ERROR_H
+#define QEMU_ERROR_H
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include "qemu/compiler.h"
+
+typedef struct Location {
+ /* all members are private to qemu-error.c */
+ enum { LOC_NONE, LOC_CMDLINE, LOC_FILE } kind;
+ int num;
+ const void *ptr;
+ struct Location *prev;
+} Location;
+
+Location *loc_push_restore(Location *loc);
+Location *loc_push_none(Location *loc);
+Location *loc_pop(Location *loc);
+Location *loc_save(Location *loc);
+void loc_restore(Location *loc);
+void loc_set_none(void);
+void loc_set_cmdline(char **argv, int idx, int cnt);
+void loc_set_file(const char *fname, int lno);
+
+void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
+void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+void error_print_loc(void);
+void error_set_progname(const char *argv0);
+void error_report(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
+const char *error_get_progname(void);
+extern bool enable_timestamp_msg;
+
+#endif
diff --git a/contrib/qemu/include/qemu/event_notifier.h b/contrib/qemu/include/qemu/event_notifier.h
new file mode 100644
index 000000000..88b57af7c
--- /dev/null
+++ b/contrib/qemu/include/qemu/event_notifier.h
@@ -0,0 +1,46 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_EVENT_NOTIFIER_H
+#define QEMU_EVENT_NOTIFIER_H
+
+#include "qemu-common.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+
+struct EventNotifier {
+#ifdef _WIN32
+ HANDLE event;
+#else
+ int rfd;
+ int wfd;
+#endif
+};
+
+typedef void EventNotifierHandler(EventNotifier *);
+
+int event_notifier_init(EventNotifier *, int active);
+void event_notifier_cleanup(EventNotifier *);
+int event_notifier_set(EventNotifier *);
+int event_notifier_test_and_clear(EventNotifier *);
+int event_notifier_set_handler(EventNotifier *, EventNotifierHandler *);
+
+#ifdef CONFIG_POSIX
+void event_notifier_init_fd(EventNotifier *, int fd);
+int event_notifier_get_fd(EventNotifier *);
+#else
+HANDLE event_notifier_get_handle(EventNotifier *);
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/hbitmap.h b/contrib/qemu/include/qemu/hbitmap.h
new file mode 100644
index 000000000..550d7ce2c
--- /dev/null
+++ b/contrib/qemu/include/qemu/hbitmap.h
@@ -0,0 +1,209 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#ifndef HBITMAP_H
+#define HBITMAP_H 1
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "bitops.h"
+#include "host-utils.h"
+
+typedef struct HBitmap HBitmap;
+typedef struct HBitmapIter HBitmapIter;
+
+#define BITS_PER_LEVEL (BITS_PER_LONG == 32 ? 5 : 6)
+
+/* For 32-bit, the largest that fits in a 4 GiB address space.
+ * For 64-bit, the number of sectors in 1 PiB. Good luck, in
+ * either case... :)
+ */
+#define HBITMAP_LOG_MAX_SIZE (BITS_PER_LONG == 32 ? 34 : 41)
+
+/* We need to place a sentinel in level 0 to speed up iteration. Thus,
+ * we do this instead of HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL. The
+ * difference is that it allocates an extra level when HBITMAP_LOG_MAX_SIZE
+ * is an exact multiple of BITS_PER_LEVEL.
+ */
+#define HBITMAP_LEVELS ((HBITMAP_LOG_MAX_SIZE / BITS_PER_LEVEL) + 1)
+
+struct HBitmapIter {
+ const HBitmap *hb;
+
+ /* Copied from hb for access in the inline functions (hb is opaque). */
+ int granularity;
+
+ /* Entry offset into the last-level array of longs. */
+ size_t pos;
+
+ /* The currently-active path in the tree. Each item of cur[i] stores
+ * the bits (i.e. the subtrees) yet to be processed under that node.
+ */
+ unsigned long cur[HBITMAP_LEVELS];
+};
+
+/**
+ * hbitmap_alloc:
+ * @size: Number of bits in the bitmap.
+ * @granularity: Granularity of the bitmap. Aligned groups of 2^@granularity
+ * bits will be represented by a single bit. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire set; iteration will only visit the first
+ * bit of each group.
+ *
+ * Allocate a new HBitmap.
+ */
+HBitmap *hbitmap_alloc(uint64_t size, int granularity);
+
+/**
+ * hbitmap_empty:
+ * @hb: HBitmap to operate on.
+ *
+ * Return whether the bitmap is empty.
+ */
+bool hbitmap_empty(const HBitmap *hb);
+
+/**
+ * hbitmap_granularity:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the granularity of the HBitmap.
+ */
+int hbitmap_granularity(const HBitmap *hb);
+
+/**
+ * hbitmap_count:
+ * @hb: HBitmap to operate on.
+ *
+ * Return the number of bits set in the HBitmap.
+ */
+uint64_t hbitmap_count(const HBitmap *hb);
+
+/**
+ * hbitmap_set:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to set (0-based).
+ * @count: Number of bits to set.
+ *
+ * Set a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_reset:
+ * @hb: HBitmap to operate on.
+ * @start: First bit to reset (0-based).
+ * @count: Number of bits to reset.
+ *
+ * Reset a consecutive range of bits in an HBitmap.
+ */
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count);
+
+/**
+ * hbitmap_get:
+ * @hb: HBitmap to operate on.
+ * @item: Bit to query (0-based).
+ *
+ * Return whether the @item-th bit in an HBitmap is set.
+ */
+bool hbitmap_get(const HBitmap *hb, uint64_t item);
+
+/**
+ * hbitmap_free:
+ * @hb: HBitmap to operate on.
+ *
+ * Free an HBitmap and all of its associated memory.
+ */
+void hbitmap_free(HBitmap *hb);
+
+/**
+ * hbitmap_iter_init:
+ * @hbi: HBitmapIter to initialize.
+ * @hb: HBitmap to iterate on.
+ * @first: First bit to visit (0-based, must be strictly less than the
+ * size of the bitmap).
+ *
+ * Set up @hbi to iterate on the HBitmap @hb. hbitmap_iter_next will return
+ * the lowest-numbered bit that is set in @hb, starting at @first.
+ *
+ * Concurrent setting of bits is acceptable, and will at worst cause the
+ * iteration to miss some of those bits. Resetting bits before the current
+ * position of the iterator is also okay. However, concurrent resetting of
+ * bits can lead to unexpected behavior if the iterator has not yet reached
+ * those bits.
+ */
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first);
+
+/* hbitmap_iter_skip_words:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Internal function used by hbitmap_iter_next and hbitmap_iter_next_word.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi);
+
+/**
+ * hbitmap_iter_next:
+ * @hbi: HBitmapIter to operate on.
+ *
+ * Return the next bit that is set in @hbi's associated HBitmap,
+ * or -1 if all remaining bits are zero.
+ */
+static inline int64_t hbitmap_iter_next(HBitmapIter *hbi)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+ int64_t item;
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next bit. */
+ hbi->cur[HBITMAP_LEVELS - 1] = cur & (cur - 1);
+ item = ((uint64_t)hbi->pos << BITS_PER_LEVEL) + ctzl(cur);
+
+ return item << hbi->granularity;
+}
+
+/**
+ * hbitmap_iter_next_word:
+ * @hbi: HBitmapIter to operate on.
+ * @p_cur: Location where to store the next non-zero word.
+ *
+ * Return the index of the next nonzero word that is set in @hbi's
+ * associated HBitmap, and set *p_cur to the content of that word
+ * (bits before the index that was passed to hbitmap_iter_init are
+ * trimmed on the first call). Return -1, and set *p_cur to zero,
+ * if all remaining words are zero.
+ */
+static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_cur)
+{
+ unsigned long cur = hbi->cur[HBITMAP_LEVELS - 1];
+
+ if (cur == 0) {
+ cur = hbitmap_iter_skip_words(hbi);
+ if (cur == 0) {
+ *p_cur = 0;
+ return -1;
+ }
+ }
+
+ /* The next call will resume work from the next word. */
+ hbi->cur[HBITMAP_LEVELS - 1] = 0;
+ *p_cur = cur;
+ return hbi->pos;
+}
+
+
+#endif
diff --git a/contrib/qemu/include/qemu/host-utils.h b/contrib/qemu/include/qemu/host-utils.h
new file mode 100644
index 000000000..0f688c1c0
--- /dev/null
+++ b/contrib/qemu/include/qemu/host-utils.h
@@ -0,0 +1,322 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2007 Thiemo Seufer
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef HOST_UTILS_H
+#define HOST_UTILS_H 1
+
+#include "qemu/compiler.h" /* QEMU_GNUC_PREREQ */
+#include <limits.h>
+
+#ifdef CONFIG_INT128
+static inline void mulu64(uint64_t *plow, uint64_t *phigh,
+ uint64_t a, uint64_t b)
+{
+ __uint128_t r = (__uint128_t)a * b;
+ *plow = r;
+ *phigh = r >> 64;
+}
+
+static inline void muls64(uint64_t *plow, uint64_t *phigh,
+ int64_t a, int64_t b)
+{
+ __int128_t r = (__int128_t)a * b;
+ *plow = r;
+ *phigh = r >> 64;
+}
+#else
+void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
+void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
+#endif
+
+/**
+ * clz32 - count leading zeros in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int clz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_clz(val) : 32;
+#else
+ /* Binary search for the leading one bit. */
+ int cnt = 0;
+
+ if (!(val & 0xFFFF0000U)) {
+ cnt += 16;
+ val <<= 16;
+ }
+ if (!(val & 0xFF000000U)) {
+ cnt += 8;
+ val <<= 8;
+ }
+ if (!(val & 0xF0000000U)) {
+ cnt += 4;
+ val <<= 4;
+ }
+ if (!(val & 0xC0000000U)) {
+ cnt += 2;
+ val <<= 2;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ val <<= 1;
+ }
+ if (!(val & 0x80000000U)) {
+ cnt++;
+ }
+ return cnt;
+#endif
+}
+
+/**
+ * clo32 - count leading ones in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is -1.
+ */
+static inline int clo32(uint32_t val)
+{
+ return clz32(~val);
+}
+
+/**
+ * clz64 - count leading zeros in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int clz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_clzll(val) : 64;
+#else
+ int cnt = 0;
+
+ if (!(val >> 32)) {
+ cnt += 32;
+ } else {
+ val >>= 32;
+ }
+
+ return cnt + clz32(val);
+#endif
+}
+
+/**
+ * clo64 - count leading ones in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is -1.
+ */
+static inline int clo64(uint64_t val)
+{
+ return clz64(~val);
+}
+
+/**
+ * ctz32 - count trailing zeros in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_ctz(val) : 32;
+#else
+ /* Binary search for the trailing one bit. */
+ int cnt;
+
+ cnt = 0;
+ if (!(val & 0x0000FFFFUL)) {
+ cnt += 16;
+ val >>= 16;
+ }
+ if (!(val & 0x000000FFUL)) {
+ cnt += 8;
+ val >>= 8;
+ }
+ if (!(val & 0x0000000FUL)) {
+ cnt += 4;
+ val >>= 4;
+ }
+ if (!(val & 0x00000003UL)) {
+ cnt += 2;
+ val >>= 2;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ val >>= 1;
+ }
+ if (!(val & 0x00000001UL)) {
+ cnt++;
+ }
+
+ return cnt;
+#endif
+}
+
+/**
+ * cto32 - count trailing ones in a 32-bit value.
+ * @val: The value to search
+ *
+ * Returns 32 if the value is -1.
+ */
+static inline int cto32(uint32_t val)
+{
+ return ctz32(~val);
+}
+
+/**
+ * ctz64 - count trailing zeros in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is zero. Note that the GCC builtin is
+ * undefined if the value is zero.
+ */
+static inline int ctz64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return val ? __builtin_ctzll(val) : 64;
+#else
+ int cnt;
+
+ cnt = 0;
+ if (!((uint32_t)val)) {
+ cnt += 32;
+ val >>= 32;
+ }
+
+ return cnt + ctz32(val);
+#endif
+}
+
+/**
+ * ctz64 - count trailing ones in a 64-bit value.
+ * @val: The value to search
+ *
+ * Returns 64 if the value is -1.
+ */
+static inline int cto64(uint64_t val)
+{
+ return ctz64(~val);
+}
+
+/**
+ * ctpop8 - count the population of one bits in an 8-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop8(uint8_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55) + ((val >> 1) & 0x55);
+ val = (val & 0x33) + ((val >> 2) & 0x33);
+ val = (val & 0x0f) + ((val >> 4) & 0x0f);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop16 - count the population of one bits in a 16-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop16(uint16_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x5555) + ((val >> 1) & 0x5555);
+ val = (val & 0x3333) + ((val >> 2) & 0x3333);
+ val = (val & 0x0f0f) + ((val >> 4) & 0x0f0f);
+ val = (val & 0x00ff) + ((val >> 8) & 0x00ff);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop32 - count the population of one bits in a 32-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop32(uint32_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcount(val);
+#else
+ val = (val & 0x55555555) + ((val >> 1) & 0x55555555);
+ val = (val & 0x33333333) + ((val >> 2) & 0x33333333);
+ val = (val & 0x0f0f0f0f) + ((val >> 4) & 0x0f0f0f0f);
+ val = (val & 0x00ff00ff) + ((val >> 8) & 0x00ff00ff);
+ val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
+
+ return val;
+#endif
+}
+
+/**
+ * ctpop64 - count the population of one bits in a 64-bit value.
+ * @val: The value to search
+ */
+static inline int ctpop64(uint64_t val)
+{
+#if QEMU_GNUC_PREREQ(3, 4)
+ return __builtin_popcountll(val);
+#else
+ val = (val & 0x5555555555555555ULL) + ((val >> 1) & 0x5555555555555555ULL);
+ val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+ val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >> 4) & 0x0f0f0f0f0f0f0f0fULL);
+ val = (val & 0x00ff00ff00ff00ffULL) + ((val >> 8) & 0x00ff00ff00ff00ffULL);
+ val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) & 0x0000ffff0000ffffULL);
+ val = (val & 0x00000000ffffffffULL) + ((val >> 32) & 0x00000000ffffffffULL);
+
+ return val;
+#endif
+}
+
+/* Host type specific sizes of these routines. */
+
+#if ULONG_MAX == UINT32_MAX
+# define clzl clz32
+# define ctzl ctz32
+# define clol clo32
+# define ctol cto32
+# define ctpopl ctpop32
+#elif ULONG_MAX == UINT64_MAX
+# define clzl clz64
+# define ctzl ctz64
+# define clol clo64
+# define ctol cto64
+# define ctpopl ctpop64
+#else
+# error Unknown sizeof long
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/iov.h b/contrib/qemu/include/qemu/iov.h
new file mode 100644
index 000000000..68d25f29b
--- /dev/null
+++ b/contrib/qemu/include/qemu/iov.h
@@ -0,0 +1,115 @@
+/*
+ * Helpers for using (partial) iovecs.
+ *
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Amit Shah <amit.shah@redhat.com>
+ * Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef IOV_H
+#define IOV_H
+
+#include "qemu-common.h"
+
+/**
+ * count and return data size, in bytes, of an iovec
+ * starting at `iov' of `iov_cnt' number of elements.
+ */
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt);
+
+/**
+ * Copy from single continuous buffer to scatter-gather vector of buffers
+ * (iovec) and back like memcpy() between two continuous memory regions.
+ * Data in single continuous buffer starting at address `buf' and
+ * `bytes' bytes long will be copied to/from an iovec `iov' with
+ * `iov_cnt' number of elements, starting at byte position `offset'
+ * within the iovec. If the iovec does not contain enough space,
+ * only part of data will be copied, up to the end of the iovec.
+ * Number of bytes actually copied will be returned, which is
+ * min(bytes, iov_size(iov)-offset)
+ * `Offset' must point to the inside of iovec.
+ * It is okay to use very large value for `bytes' since we're
+ * limited by the size of the iovec anyway, provided that the
+ * buffer pointed to by buf has enough space. One possible
+ * such "large" value is -1 (sinice size_t is unsigned),
+ * so specifying `-1' as `bytes' means 'up to the end of iovec'.
+ */
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, const void *buf, size_t bytes);
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, void *buf, size_t bytes);
+
+/**
+ * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements,
+ * starting at byte offset `start', to value `fillc', repeating it
+ * `bytes' number of times. `Offset' must point to the inside of iovec.
+ * If `bytes' is large enough, only last bytes portion of iovec,
+ * up to the end of it, will be filled with the specified value.
+ * Function return actual number of bytes processed, which is
+ * min(size, iov_size(iov) - offset).
+ * Again, it is okay to use large value for `bytes' to mean "up to the end".
+ */
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, int fillc, size_t bytes);
+
+/*
+ * Send/recv data from/to iovec buffers directly
+ *
+ * `offset' bytes in the beginning of iovec buffer are skipped and
+ * next `bytes' bytes are used, which must be within data of iovec.
+ *
+ * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true);
+ *
+ * is logically equivalent to
+ *
+ * char *buf = malloc(bytes);
+ * iov_to_buf(iov, iovcnt, offset, buf, bytes);
+ * r = send(sockfd, buf, bytes, 0);
+ * free(buf);
+ *
+ * For iov_send_recv() _whole_ area being sent or received
+ * should be within the iovec, not only beginning of it.
+ */
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send);
+#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false)
+#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \
+ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true)
+
+/**
+ * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements
+ * in file `fp', prefixing each line with `prefix' and processing not more
+ * than `limit' data bytes.
+ */
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+ FILE *fp, const char *prefix, size_t limit);
+
+/*
+ * Partial copy of vector from iov to dst_iov (data is not copied).
+ * dst_iov overlaps iov at a specified offset.
+ * size of dst_iov is at most bytes. dst vector count is returned.
+ */
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+ const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, size_t bytes);
+
+/*
+ * Remove a given number of bytes from the front or back of a vector.
+ * This may update iov and/or iov_cnt to exclude iovec elements that are
+ * no longer required.
+ *
+ * The number of bytes actually discarded is returned. This number may be
+ * smaller than requested if the vector is too small.
+ */
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+ size_t bytes);
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+ size_t bytes);
+
+#endif
diff --git a/contrib/qemu/include/qemu/main-loop.h b/contrib/qemu/include/qemu/main-loop.h
new file mode 100644
index 000000000..6f0200a7a
--- /dev/null
+++ b/contrib/qemu/include/qemu/main-loop.h
@@ -0,0 +1,311 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_MAIN_LOOP_H
+#define QEMU_MAIN_LOOP_H 1
+
+#include "block/aio.h"
+
+#define SIG_IPI SIGUSR1
+
+/**
+ * qemu_init_main_loop: Set up the process so that it can run the main loop.
+ *
+ * This includes setting up signal handlers. It should be called before
+ * any other threads are created. In addition, threads other than the
+ * main one should block signals that are trapped by the main loop.
+ * For simplicity, you can consider these signals to be safe: SIGUSR1,
+ * SIGUSR2, thread signals (SIGFPE, SIGILL, SIGSEGV, SIGBUS) and real-time
+ * signals if available. Remember that Windows in practice does not have
+ * signals, though.
+ *
+ * In the case of QEMU tools, this will also start/initialize timers.
+ */
+int qemu_init_main_loop(void);
+
+/**
+ * main_loop_wait: Run one iteration of the main loop.
+ *
+ * If @nonblocking is true, poll for events, otherwise suspend until
+ * one actually occurs. The main loop usually consists of a loop that
+ * repeatedly calls main_loop_wait(false).
+ *
+ * Main loop services include file descriptor callbacks, bottom halves
+ * and timers (defined in qemu-timer.h). Bottom halves are similar to timers
+ * that execute immediately, but have a lower overhead and scheduling them
+ * is wait-free, thread-safe and signal-safe.
+ *
+ * It is sometimes useful to put a whole program in a coroutine. In this
+ * case, the coroutine actually should be started from within the main loop,
+ * so that the main loop can run whenever the coroutine yields. To do this,
+ * you can use a bottom half to enter the coroutine as soon as the main loop
+ * starts:
+ *
+ * void enter_co_bh(void *opaque) {
+ * QEMUCoroutine *co = opaque;
+ * qemu_coroutine_enter(co, NULL);
+ * }
+ *
+ * ...
+ * QEMUCoroutine *co = qemu_coroutine_create(coroutine_entry);
+ * QEMUBH *start_bh = qemu_bh_new(enter_co_bh, co);
+ * qemu_bh_schedule(start_bh);
+ * while (...) {
+ * main_loop_wait(false);
+ * }
+ *
+ * (In the future we may provide a wrapper for this).
+ *
+ * @nonblocking: Whether the caller should block until an event occurs.
+ */
+int main_loop_wait(int nonblocking);
+
+/**
+ * qemu_get_aio_context: Return the main loop's AioContext
+ */
+AioContext *qemu_get_aio_context(void);
+
+/**
+ * qemu_notify_event: Force processing of pending events.
+ *
+ * Similar to signaling a condition variable, qemu_notify_event forces
+ * main_loop_wait to look at pending events and exit. The caller of
+ * main_loop_wait will usually call it again very soon, so qemu_notify_event
+ * also has the side effect of recalculating the sets of file descriptors
+ * that the main loop waits for.
+ *
+ * Calling qemu_notify_event is rarely necessary, because main loop
+ * services (bottom halves and timers) call it themselves. One notable
+ * exception occurs when using qemu_set_fd_handler2 (see below).
+ */
+void qemu_notify_event(void);
+
+#ifdef _WIN32
+/* return TRUE if no sleep should be done afterwards */
+typedef int PollingFunc(void *opaque);
+
+/**
+ * qemu_add_polling_cb: Register a Windows-specific polling callback
+ *
+ * Currently, under Windows some events are polled rather than waited for.
+ * Polling callbacks do not ensure that @func is called timely, because
+ * the main loop might wait for an arbitrarily long time. If possible,
+ * you should instead create a separate thread that does a blocking poll
+ * and set a Win32 event object. The event can then be passed to
+ * qemu_add_wait_object.
+ *
+ * Polling callbacks really have nothing Windows specific in them, but
+ * as they are a hack and are currently not necessary under POSIX systems,
+ * they are only available when QEMU is running under Windows.
+ *
+ * @func: The function that does the polling, and returns 1 to force
+ * immediate completion of main_loop_wait.
+ * @opaque: A pointer-size value that is passed to @func.
+ */
+int qemu_add_polling_cb(PollingFunc *func, void *opaque);
+
+/**
+ * qemu_del_polling_cb: Unregister a Windows-specific polling callback
+ *
+ * This function removes a callback that was registered with
+ * qemu_add_polling_cb.
+ *
+ * @func: The function that was passed to qemu_add_polling_cb.
+ * @opaque: A pointer-size value that was passed to qemu_add_polling_cb.
+ */
+void qemu_del_polling_cb(PollingFunc *func, void *opaque);
+
+/* Wait objects handling */
+typedef void WaitObjectFunc(void *opaque);
+
+/**
+ * qemu_add_wait_object: Register a callback for a Windows handle
+ *
+ * Under Windows, the iohandler mechanism can only be used with sockets.
+ * QEMU must use the WaitForMultipleObjects API to wait on other handles.
+ * This function registers a #HANDLE with QEMU, so that it will be included
+ * in the main loop's calls to WaitForMultipleObjects. When the handle
+ * is in a signaled state, QEMU will call @func.
+ *
+ * @handle: The Windows handle to be observed.
+ * @func: A function to be called when @handle is in a signaled state.
+ * @opaque: A pointer-size value that is passed to @func.
+ */
+int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+
+/**
+ * qemu_del_wait_object: Unregister a callback for a Windows handle
+ *
+ * This function removes a callback that was registered with
+ * qemu_add_wait_object.
+ *
+ * @func: The function that was passed to qemu_add_wait_object.
+ * @opaque: A pointer-size value that was passed to qemu_add_wait_object.
+ */
+void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque);
+#endif
+
+/* async I/O support */
+
+typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size);
+typedef int IOCanReadHandler(void *opaque);
+
+/**
+ * qemu_set_fd_handler2: Register a file descriptor with the main loop
+ *
+ * This function tells the main loop to wake up whenever one of the
+ * following conditions is true:
+ *
+ * 1) if @fd_write is not %NULL, when the file descriptor is writable;
+ *
+ * 2) if @fd_read is not %NULL, when the file descriptor is readable.
+ *
+ * @fd_read_poll can be used to disable the @fd_read callback temporarily.
+ * This is useful to avoid calling qemu_set_fd_handler2 every time the
+ * client becomes interested in reading (or dually, stops being interested).
+ * A typical example is when @fd is a listening socket and you want to bound
+ * the number of active clients. Remember to call qemu_notify_event whenever
+ * the condition may change from %false to %true.
+ *
+ * The callbacks that are set up by qemu_set_fd_handler2 are level-triggered.
+ * If @fd_read does not read from @fd, or @fd_write does not write to @fd
+ * until its buffers are full, they will be called again on the next
+ * iteration.
+ *
+ * @fd: The file descriptor to be observed. Under Windows it must be
+ * a #SOCKET.
+ *
+ * @fd_read_poll: A function that returns 1 if the @fd_read callback
+ * should be fired. If the function returns 0, the main loop will not
+ * end its iteration even if @fd becomes readable.
+ *
+ * @fd_read: A level-triggered callback that is fired if @fd is readable
+ * at the beginning of a main loop iteration, or if it becomes readable
+ * during one.
+ *
+ * @fd_write: A level-triggered callback that is fired when @fd is writable
+ * at the beginning of a main loop iteration, or if it becomes writable
+ * during one.
+ *
+ * @opaque: A pointer-sized value that is passed to @fd_read_poll,
+ * @fd_read and @fd_write.
+ */
+int qemu_set_fd_handler2(int fd,
+ IOCanReadHandler *fd_read_poll,
+ IOHandler *fd_read,
+ IOHandler *fd_write,
+ void *opaque);
+
+/**
+ * qemu_set_fd_handler: Register a file descriptor with the main loop
+ *
+ * This function tells the main loop to wake up whenever one of the
+ * following conditions is true:
+ *
+ * 1) if @fd_write is not %NULL, when the file descriptor is writable;
+ *
+ * 2) if @fd_read is not %NULL, when the file descriptor is readable.
+ *
+ * The callbacks that are set up by qemu_set_fd_handler are level-triggered.
+ * If @fd_read does not read from @fd, or @fd_write does not write to @fd
+ * until its buffers are full, they will be called again on the next
+ * iteration.
+ *
+ * @fd: The file descriptor to be observed. Under Windows it must be
+ * a #SOCKET.
+ *
+ * @fd_read: A level-triggered callback that is fired if @fd is readable
+ * at the beginning of a main loop iteration, or if it becomes readable
+ * during one.
+ *
+ * @fd_write: A level-triggered callback that is fired when @fd is writable
+ * at the beginning of a main loop iteration, or if it becomes writable
+ * during one.
+ *
+ * @opaque: A pointer-sized value that is passed to @fd_read and @fd_write.
+ */
+int qemu_set_fd_handler(int fd,
+ IOHandler *fd_read,
+ IOHandler *fd_write,
+ void *opaque);
+
+#ifdef CONFIG_POSIX
+/**
+ * qemu_add_child_watch: Register a child process for reaping.
+ *
+ * Under POSIX systems, a parent process must read the exit status of
+ * its child processes using waitpid, or the operating system will not
+ * free some of the resources attached to that process.
+ *
+ * This function directs the QEMU main loop to observe a child process
+ * and call waitpid as soon as it exits; the watch is then removed
+ * automatically. It is useful whenever QEMU forks a child process
+ * but will find out about its termination by other means such as a
+ * "broken pipe".
+ *
+ * @pid: The pid that QEMU should observe.
+ */
+int qemu_add_child_watch(pid_t pid);
+#endif
+
+/**
+ * qemu_mutex_lock_iothread: Lock the main loop mutex.
+ *
+ * This function locks the main loop mutex. The mutex is taken by
+ * qemu_init_main_loop and always taken except while waiting on
+ * external events (such as with select). The mutex should be taken
+ * by threads other than the main loop thread when calling
+ * qemu_bh_new(), qemu_set_fd_handler() and basically all other
+ * functions documented in this file.
+ *
+ * NOTE: tools currently are single-threaded and qemu_mutex_lock_iothread
+ * is a no-op there.
+ */
+void qemu_mutex_lock_iothread(void);
+
+/**
+ * qemu_mutex_unlock_iothread: Unlock the main loop mutex.
+ *
+ * This function unlocks the main loop mutex. The mutex is taken by
+ * qemu_init_main_loop and always taken except while waiting on
+ * external events (such as with select). The mutex should be unlocked
+ * as soon as possible by threads other than the main loop thread,
+ * because it prevents the main loop from processing callbacks,
+ * including timers and bottom halves.
+ *
+ * NOTE: tools currently are single-threaded and qemu_mutex_unlock_iothread
+ * is a no-op there.
+ */
+void qemu_mutex_unlock_iothread(void);
+
+/* internal interfaces */
+
+void qemu_fd_register(int fd);
+void qemu_iohandler_fill(GArray *pollfds);
+void qemu_iohandler_poll(GArray *pollfds, int rc);
+
+QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
+void qemu_bh_schedule_idle(QEMUBH *bh);
+
+#endif
diff --git a/contrib/qemu/include/qemu/module.h b/contrib/qemu/include/qemu/module.h
new file mode 100644
index 000000000..c4ccd5716
--- /dev/null
+++ b/contrib/qemu/include/qemu/module.h
@@ -0,0 +1,40 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_MODULE_H
+#define QEMU_MODULE_H
+
+/* This should not be used directly. Use block_init etc. instead. */
+#define module_init(function, type) \
+static void __attribute__((constructor)) do_qemu_init_ ## function(void) { \
+ register_module_init(function, type); \
+}
+
+typedef enum {
+ MODULE_INIT_BLOCK,
+ MODULE_INIT_MACHINE,
+ MODULE_INIT_QAPI,
+ MODULE_INIT_QOM,
+ MODULE_INIT_MAX
+} module_init_type;
+
+#define block_init(function) module_init(function, MODULE_INIT_BLOCK)
+#define machine_init(function) module_init(function, MODULE_INIT_MACHINE)
+#define qapi_init(function) module_init(function, MODULE_INIT_QAPI)
+#define type_init(function) module_init(function, MODULE_INIT_QOM)
+
+void register_module_init(void (*fn)(void), module_init_type type);
+
+void module_call_init(module_init_type type);
+
+#endif
diff --git a/contrib/qemu/include/qemu/notify.h b/contrib/qemu/include/qemu/notify.h
new file mode 100644
index 000000000..a3d73e4bc
--- /dev/null
+++ b/contrib/qemu/include/qemu/notify.h
@@ -0,0 +1,72 @@
+/*
+ * Notifier lists
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef QEMU_NOTIFY_H
+#define QEMU_NOTIFY_H
+
+#include "qemu/queue.h"
+
+typedef struct Notifier Notifier;
+
+struct Notifier
+{
+ void (*notify)(Notifier *notifier, void *data);
+ QLIST_ENTRY(Notifier) node;
+};
+
+typedef struct NotifierList
+{
+ QLIST_HEAD(, Notifier) notifiers;
+} NotifierList;
+
+#define NOTIFIER_LIST_INITIALIZER(head) \
+ { QLIST_HEAD_INITIALIZER((head).notifiers) }
+
+void notifier_list_init(NotifierList *list);
+
+void notifier_list_add(NotifierList *list, Notifier *notifier);
+
+void notifier_remove(Notifier *notifier);
+
+void notifier_list_notify(NotifierList *list, void *data);
+
+/* Same as Notifier but allows .notify() to return errors */
+typedef struct NotifierWithReturn NotifierWithReturn;
+
+struct NotifierWithReturn {
+ /**
+ * Return 0 on success (next notifier will be invoked), otherwise
+ * notifier_with_return_list_notify() will stop and return the value.
+ */
+ int (*notify)(NotifierWithReturn *notifier, void *data);
+ QLIST_ENTRY(NotifierWithReturn) node;
+};
+
+typedef struct NotifierWithReturnList {
+ QLIST_HEAD(, NotifierWithReturn) notifiers;
+} NotifierWithReturnList;
+
+#define NOTIFIER_WITH_RETURN_LIST_INITIALIZER(head) \
+ { QLIST_HEAD_INITIALIZER((head).notifiers) }
+
+void notifier_with_return_list_init(NotifierWithReturnList *list);
+
+void notifier_with_return_list_add(NotifierWithReturnList *list,
+ NotifierWithReturn *notifier);
+
+void notifier_with_return_remove(NotifierWithReturn *notifier);
+
+int notifier_with_return_list_notify(NotifierWithReturnList *list,
+ void *data);
+
+#endif
diff --git a/contrib/qemu/include/qemu/option.h b/contrib/qemu/include/qemu/option.h
new file mode 100644
index 000000000..a83c70032
--- /dev/null
+++ b/contrib/qemu/include/qemu/option.h
@@ -0,0 +1,157 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OPTIONS_H
+#define QEMU_OPTIONS_H
+
+#include <stdint.h>
+#include "qemu/queue.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+
+enum QEMUOptionParType {
+ OPT_FLAG,
+ OPT_NUMBER,
+ OPT_SIZE,
+ OPT_STRING,
+};
+
+typedef struct QEMUOptionParameter {
+ const char *name;
+ enum QEMUOptionParType type;
+ union {
+ uint64_t n;
+ char* s;
+ } value;
+ const char *help;
+} QEMUOptionParameter;
+
+
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim);
+const char *get_opt_value(char *buf, int buf_size, const char *p);
+int get_next_param_value(char *buf, int buf_size,
+ const char *tag, const char **pstr);
+int get_param_value(char *buf, int buf_size,
+ const char *tag, const char *str);
+
+
+/*
+ * The following functions take a parameter list as input. This is a pointer to
+ * the first element of a QEMUOptionParameter array which is terminated by an
+ * entry with entry->name == NULL.
+ */
+
+QEMUOptionParameter *get_option_parameter(QEMUOptionParameter *list,
+ const char *name);
+int set_option_parameter(QEMUOptionParameter *list, const char *name,
+ const char *value);
+int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
+ uint64_t value);
+QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest,
+ QEMUOptionParameter *list);
+QEMUOptionParameter *parse_option_parameters(const char *param,
+ QEMUOptionParameter *list, QEMUOptionParameter *dest);
+void free_option_parameters(QEMUOptionParameter *list);
+void print_option_parameters(QEMUOptionParameter *list);
+void print_option_help(QEMUOptionParameter *list);
+
+/* ------------------------------------------------------------------ */
+
+typedef struct QemuOpt QemuOpt;
+typedef struct QemuOpts QemuOpts;
+typedef struct QemuOptsList QemuOptsList;
+
+enum QemuOptType {
+ QEMU_OPT_STRING = 0, /* no parsing (use string as-is) */
+ QEMU_OPT_BOOL, /* on/off */
+ QEMU_OPT_NUMBER, /* simple number */
+ QEMU_OPT_SIZE, /* size, accepts (K)ilo, (M)ega, (G)iga, (T)era postfix */
+};
+
+typedef struct QemuOptDesc {
+ const char *name;
+ enum QemuOptType type;
+ const char *help;
+} QemuOptDesc;
+
+struct QemuOptsList {
+ const char *name;
+ const char *implied_opt_name;
+ bool merge_lists; /* Merge multiple uses of option into a single list? */
+ QTAILQ_HEAD(, QemuOpts) head;
+ QemuOptDesc desc[];
+};
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name);
+/**
+ * qemu_opt_has_help_opt:
+ * @opts: options to search for a help request
+ *
+ * Check whether the options specified by @opts include one of the
+ * standard strings which indicate that the user is asking for a
+ * list of the valid values for a command line option (as defined
+ * by is_help_option()).
+ *
+ * Returns: true if @opts includes 'help' or equivalent.
+ */
+bool qemu_opt_has_help_opt(QemuOpts *opts);
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval);
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval);
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval);
+int qemu_opt_set(QemuOpts *opts, const char *name, const char *value);
+void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
+ Error **errp);
+int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val);
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val);
+typedef int (*qemu_opt_loopfunc)(const char *name, const char *value, void *opaque);
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+ int abort_on_failure);
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id);
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+ int fail_if_exists, Error **errp);
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list);
+void qemu_opts_reset(QemuOptsList *list);
+void qemu_opts_loc_restore(QemuOpts *opts);
+int qemu_opts_set(QemuOptsList *list, const char *id,
+ const char *name, const char *value);
+const char *qemu_opts_id(QemuOpts *opts);
+void qemu_opts_del(QemuOpts *opts);
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp);
+int qemu_opts_do_parse(QemuOpts *opts, const char *params, const char *firstname);
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, int permit_abbrev);
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+ int permit_abbrev);
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+ Error **errp);
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict);
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp);
+
+typedef int (*qemu_opts_loopfunc)(QemuOpts *opts, void *opaque);
+int qemu_opts_print(QemuOpts *opts, void *dummy);
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, void *opaque,
+ int abort_on_failure);
+
+#endif
diff --git a/contrib/qemu/include/qemu/option_int.h b/contrib/qemu/include/qemu/option_int.h
new file mode 100644
index 000000000..8212fa4a4
--- /dev/null
+++ b/contrib/qemu/include/qemu/option_int.h
@@ -0,0 +1,54 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OPTIONS_INTERNAL_H
+#define QEMU_OPTIONS_INTERNAL_H
+
+#include "qemu/option.h"
+#include "qemu/error-report.h"
+
+struct QemuOpt {
+ const char *name;
+ const char *str;
+
+ const QemuOptDesc *desc;
+ union {
+ bool boolean;
+ uint64_t uint;
+ } value;
+
+ QemuOpts *opts;
+ QTAILQ_ENTRY(QemuOpt) next;
+};
+
+struct QemuOpts {
+ char *id;
+ QemuOptsList *list;
+ Location loc;
+ QTAILQ_HEAD(QemuOptHead, QemuOpt) head;
+ QTAILQ_ENTRY(QemuOpts) next;
+};
+
+#endif
diff --git a/contrib/qemu/include/qemu/osdep.h b/contrib/qemu/include/qemu/osdep.h
new file mode 100644
index 000000000..26136f16e
--- /dev/null
+++ b/contrib/qemu/include/qemu/osdep.h
@@ -0,0 +1,218 @@
+#ifndef QEMU_OSDEP_H
+#define QEMU_OSDEP_H
+
+#include "config-host.h"
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#ifdef __OpenBSD__
+#include <sys/signal.h>
+#endif
+
+#ifndef _WIN32
+#include <sys/wait.h>
+#else
+#define WIFEXITED(x) 1
+#define WEXITSTATUS(x) (x)
+#endif
+
+#include <sys/time.h>
+
+#if defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10
+/* [u]int_fast*_t not in <sys/int_types.h> */
+typedef unsigned char uint_fast8_t;
+typedef unsigned int uint_fast16_t;
+typedef signed int int_fast16_t;
+#endif
+
+#ifndef glue
+#define xglue(x, y) x ## y
+#define glue(x, y) xglue(x, y)
+#define stringify(s) tostring(s)
+#define tostring(s) #s
+#endif
+
+#ifndef likely
+#if __GNUC__ < 3
+#define __builtin_expect(x, n) (x)
+#endif
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *) 0)->member) *__mptr = (ptr); \
+ (type *) ((char *) __mptr - offsetof(type, member));})
+#endif
+
+/* Convert from a base type to a parent type, with compile time checking. */
+#ifdef __GNUC__
+#define DO_UPCAST(type, field, dev) ( __extension__ ( { \
+ char __attribute__((unused)) offset_must_be_zero[ \
+ -offsetof(type, field)]; \
+ container_of(dev, type, field);}))
+#else
+#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
+#endif
+
+#define typeof_field(type, field) typeof(((type *)0)->field)
+#define type_check(t1,t2) ((t1*)0 - (t2*)0)
+
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef ROUND_UP
+#define ROUND_UP(n,d) (((n) + (d) - 1) & -(d))
+#endif
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef always_inline
+#if !((__GNUC__ < 3) || defined(__APPLE__))
+#ifdef __OPTIMIZE__
+#undef inline
+#define inline __attribute__ (( always_inline )) __inline__
+#endif
+#endif
+#else
+#undef inline
+#define inline always_inline
+#endif
+
+#define qemu_printf printf
+
+int qemu_daemon(int nochdir, int noclose);
+void *qemu_memalign(size_t alignment, size_t size);
+void *qemu_anon_ram_alloc(size_t size);
+void qemu_vfree(void *ptr);
+void qemu_anon_ram_free(void *ptr, size_t size);
+
+#define QEMU_MADV_INVALID -1
+
+#if defined(CONFIG_MADVISE)
+
+#define QEMU_MADV_WILLNEED MADV_WILLNEED
+#define QEMU_MADV_DONTNEED MADV_DONTNEED
+#ifdef MADV_DONTFORK
+#define QEMU_MADV_DONTFORK MADV_DONTFORK
+#else
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#endif
+#ifdef MADV_MERGEABLE
+#define QEMU_MADV_MERGEABLE MADV_MERGEABLE
+#else
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#endif
+#ifdef MADV_DONTDUMP
+#define QEMU_MADV_DONTDUMP MADV_DONTDUMP
+#else
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#endif
+#ifdef MADV_HUGEPAGE
+#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
+#else
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+#endif
+
+#elif defined(CONFIG_POSIX_MADVISE)
+
+#define QEMU_MADV_WILLNEED POSIX_MADV_WILLNEED
+#define QEMU_MADV_DONTNEED POSIX_MADV_DONTNEED
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+
+#else /* no-op */
+
+#define QEMU_MADV_WILLNEED QEMU_MADV_INVALID
+#define QEMU_MADV_DONTNEED QEMU_MADV_INVALID
+#define QEMU_MADV_DONTFORK QEMU_MADV_INVALID
+#define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
+#define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+
+#endif
+
+int qemu_madvise(void *addr, size_t len, int advice);
+
+int qemu_open(const char *name, int flags, ...);
+int qemu_close(int fd);
+
+#if defined(__HAIKU__) && defined(__i386__)
+#define FMT_pid "%ld"
+#elif defined(WIN64)
+#define FMT_pid "%" PRId64
+#else
+#define FMT_pid "%d"
+#endif
+
+int qemu_create_pidfile(const char *filename);
+int qemu_get_thread_id(void);
+
+#ifndef CONFIG_IOVEC
+struct iovec {
+ void *iov_base;
+ size_t iov_len;
+};
+/*
+ * Use the same value as Linux for now.
+ */
+#define IOV_MAX 1024
+
+ssize_t readv(int fd, const struct iovec *iov, int iov_cnt);
+ssize_t writev(int fd, const struct iovec *iov, int iov_cnt);
+#else
+#include <sys/uio.h>
+#endif
+
+#ifdef _WIN32
+static inline void qemu_timersub(const struct timeval *val1,
+ const struct timeval *val2,
+ struct timeval *res)
+{
+ res->tv_sec = val1->tv_sec - val2->tv_sec;
+ if (val1->tv_usec < val2->tv_usec) {
+ res->tv_sec--;
+ res->tv_usec = val1->tv_usec - val2->tv_usec + 1000 * 1000;
+ } else {
+ res->tv_usec = val1->tv_usec - val2->tv_usec;
+ }
+}
+#else
+#define qemu_timersub timersub
+#endif
+
+void qemu_set_cloexec(int fd);
+
+void qemu_set_version(const char *);
+const char *qemu_get_version(void);
+
+void fips_set_state(bool requested);
+bool fips_get_state(void);
+
+/* Return a dynamically allocated pathname denoting a file or directory that is
+ * appropriate for storing local state.
+ *
+ * @relative_pathname need not start with a directory separator; one will be
+ * added automatically.
+ *
+ * The caller is responsible for releasing the value returned with g_free()
+ * after use.
+ */
+char *qemu_get_local_state_pathname(const char *relative_pathname);
+
+#endif
diff --git a/contrib/qemu/include/qemu/queue.h b/contrib/qemu/include/qemu/queue.h
new file mode 100644
index 000000000..d433b9017
--- /dev/null
+++ b/contrib/qemu/include/qemu/queue.h
@@ -0,0 +1,414 @@
+/* $NetBSD: queue.h,v 1.52 2009/04/20 09:56:08 mschuett Exp $ */
+
+/*
+ * QEMU version: Copy from netbsd, removed debug code, removed some of
+ * the implementations. Left in singly-linked lists, lists, simple
+ * queues, and tail queues.
+ */
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef QEMU_SYS_QUEUE_H_
+#define QEMU_SYS_QUEUE_H_
+
+/*
+ * This file defines four types of data structures: singly-linked lists,
+ * lists, simple queues, and tail queues.
+ *
+ * A singly-linked list is headed by a single forward pointer. The
+ * elements are singly linked for minimum space and pointer manipulation
+ * overhead at the expense of O(n) removal for arbitrary elements. New
+ * elements can be added to the list after an existing element or at the
+ * head of the list. Elements being removed from the head of the list
+ * should use the explicit macro for this purpose for optimum
+ * efficiency. A singly-linked list may only be traversed in the forward
+ * direction. Singly-linked lists are ideal for applications with large
+ * datasets and few or no removals or for implementing a LIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A simple queue is headed by a pair of pointers, one the head of the
+ * list and the other to the tail of the list. The elements are singly
+ * linked to save space, so elements can only be removed from the
+ * head of the list. New elements can be added to the list after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A simple queue may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+#include "qemu/atomic.h" /* for smp_wmb() */
+
+/*
+ * List definitions.
+ */
+#define QLIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define QLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define QLIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+#define QLIST_INIT(head) do { \
+ (head)->lh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_AFTER(listelm, elm, field) do { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ (elm)->field.le_next = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &(elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_INSERT_HEAD_RCU(head, elm, field) do { \
+ (elm)->field.le_prev = &(head)->lh_first; \
+ (elm)->field.le_next = (head)->lh_first; \
+ smp_wmb(); /* fill elm before linking it */ \
+ if ((head)->lh_first != NULL) { \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next; \
+ } \
+ (head)->lh_first = (elm); \
+ smp_wmb(); \
+} while (/* CONSTCOND*/0)
+
+#define QLIST_REMOVE(elm, field) do { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define QLIST_FOREACH(var, head, field) \
+ for ((var) = ((head)->lh_first); \
+ (var); \
+ (var) = ((var)->field.le_next))
+
+#define QLIST_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->lh_first); \
+ (var) && ((next_var) = ((var)->field.le_next), 1); \
+ (var) = (next_var))
+
+/*
+ * List access methods.
+ */
+#define QLIST_EMPTY(head) ((head)->lh_first == NULL)
+#define QLIST_FIRST(head) ((head)->lh_first)
+#define QLIST_NEXT(elm, field) ((elm)->field.le_next)
+
+
+/*
+ * Singly-linked List definitions.
+ */
+#define QSLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define QSLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define QSLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+
+/*
+ * Singly-linked List functions.
+ */
+#define QSLIST_INIT(head) do { \
+ (head)->slh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ (elm)->field.sle_next = (slistelm)->field.sle_next; \
+ (slistelm)->field.sle_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.sle_next = (head)->slh_first; \
+ (head)->slh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_REMOVE_HEAD(head, field) do { \
+ (head)->slh_first = (head)->slh_first->field.sle_next; \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_REMOVE_AFTER(slistelm, field) do { \
+ (slistelm)->field.sle_next = \
+ QSLIST_NEXT(QSLIST_NEXT((slistelm), field), field); \
+} while (/*CONSTCOND*/0)
+
+#define QSLIST_FOREACH(var, head, field) \
+ for((var) = (head)->slh_first; (var); (var) = (var)->field.sle_next)
+
+#define QSLIST_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = QSLIST_FIRST((head)); \
+ (var) && ((tvar) = QSLIST_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+/*
+ * Singly-linked List access methods.
+ */
+#define QSLIST_EMPTY(head) ((head)->slh_first == NULL)
+#define QSLIST_FIRST(head) ((head)->slh_first)
+#define QSLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+
+/*
+ * Simple queue definitions.
+ */
+#define QSIMPLEQ_HEAD(name, type) \
+struct name { \
+ struct type *sqh_first; /* first element */ \
+ struct type **sqh_last; /* addr of last next element */ \
+}
+
+#define QSIMPLEQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).sqh_first }
+
+#define QSIMPLEQ_ENTRY(type) \
+struct { \
+ struct type *sqe_next; /* next element */ \
+}
+
+/*
+ * Simple queue functions.
+ */
+#define QSIMPLEQ_INIT(head) do { \
+ (head)->sqh_first = NULL; \
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (head)->sqh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.sqe_next = NULL; \
+ *(head)->sqh_last = (elm); \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (listelm)->field.sqe_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE_HEAD(head, field) do { \
+ if (((head)->sqh_first = (head)->sqh_first->field.sqe_next) == NULL)\
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_REMOVE(head, elm, type, field) do { \
+ if ((head)->sqh_first == (elm)) { \
+ QSIMPLEQ_REMOVE_HEAD((head), field); \
+ } else { \
+ struct type *curelm = (head)->sqh_first; \
+ while (curelm->field.sqe_next != (elm)) \
+ curelm = curelm->field.sqe_next; \
+ if ((curelm->field.sqe_next = \
+ curelm->field.sqe_next->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(curelm)->field.sqe_next; \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->sqh_first); \
+ (var); \
+ (var) = ((var)->field.sqe_next))
+
+#define QSIMPLEQ_FOREACH_SAFE(var, head, field, next) \
+ for ((var) = ((head)->sqh_first); \
+ (var) && ((next = ((var)->field.sqe_next)), 1); \
+ (var) = (next))
+
+#define QSIMPLEQ_CONCAT(head1, head2) do { \
+ if (!QSIMPLEQ_EMPTY((head2))) { \
+ *(head1)->sqh_last = (head2)->sqh_first; \
+ (head1)->sqh_last = (head2)->sqh_last; \
+ QSIMPLEQ_INIT((head2)); \
+ } \
+} while (/*CONSTCOND*/0)
+
+#define QSIMPLEQ_LAST(head, type, field) \
+ (QSIMPLEQ_EMPTY((head)) ? \
+ NULL : \
+ ((struct type *)(void *) \
+ ((char *)((head)->sqh_last) - offsetof(struct type, field))))
+
+/*
+ * Simple queue access methods.
+ */
+#define QSIMPLEQ_EMPTY(head) ((head)->sqh_first == NULL)
+#define QSIMPLEQ_FIRST(head) ((head)->sqh_first)
+#define QSIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
+
+
+/*
+ * Tail queue definitions.
+ */
+#define Q_TAILQ_HEAD(name, type, qual) \
+struct name { \
+ qual type *tqh_first; /* first element */ \
+ qual type *qual *tqh_last; /* addr of last next element */ \
+}
+#define QTAILQ_HEAD(name, type) Q_TAILQ_HEAD(name, struct type,)
+
+#define QTAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define Q_TAILQ_ENTRY(type, qual) \
+struct { \
+ qual type *tqe_next; /* next element */ \
+ qual type *qual *tqe_prev; /* address of previous next element */\
+}
+#define QTAILQ_ENTRY(type) Q_TAILQ_ENTRY(struct type,)
+
+/*
+ * Tail queue functions.
+ */
+#define QTAILQ_INIT(head) do { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (head)->tqh_first->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_REMOVE(head, elm, field) do { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define QTAILQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->tqh_first); \
+ (var); \
+ (var) = ((var)->field.tqe_next))
+
+#define QTAILQ_FOREACH_SAFE(var, head, field, next_var) \
+ for ((var) = ((head)->tqh_first); \
+ (var) && ((next_var) = ((var)->field.tqe_next), 1); \
+ (var) = (next_var))
+
+#define QTAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \
+ (var); \
+ (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
+
+/*
+ * Tail queue access methods.
+ */
+#define QTAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#define QTAILQ_FIRST(head) ((head)->tqh_first)
+#define QTAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define QTAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define QTAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+#endif /* !QEMU_SYS_QUEUE_H_ */
diff --git a/contrib/qemu/include/qemu/sockets.h b/contrib/qemu/include/qemu/sockets.h
new file mode 100644
index 000000000..c5174d76a
--- /dev/null
+++ b/contrib/qemu/include/qemu/sockets.h
@@ -0,0 +1,83 @@
+/* headers to use the BSD sockets */
+#ifndef QEMU_SOCKET_H
+#define QEMU_SOCKET_H
+
+#ifdef _WIN32
+#include <windows.h>
+#include <winsock2.h>
+#include <ws2tcpip.h>
+
+#define socket_error() WSAGetLastError()
+
+int inet_aton(const char *cp, struct in_addr *ia);
+
+#else
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/un.h>
+
+#define socket_error() errno
+#define closesocket(s) close(s)
+
+#endif /* !_WIN32 */
+
+#include "qemu/option.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+
+extern QemuOptsList socket_optslist;
+
+/* misc helpers */
+int qemu_socket(int domain, int type, int protocol);
+int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen);
+int socket_set_cork(int fd, int v);
+int socket_set_nodelay(int fd);
+void qemu_set_block(int fd);
+void qemu_set_nonblock(int fd);
+int send_all(int fd, const void *buf, int len1);
+int recv_all(int fd, void *buf, int len1, bool single_read);
+
+/* callback function for nonblocking connect
+ * valid fd on success, negative error code on failure
+ */
+typedef void NonBlockingConnectHandler(int fd, void *opaque);
+
+InetSocketAddress *inet_parse(const char *str, Error **errp);
+int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp);
+int inet_listen(const char *str, char *ostr, int olen,
+ int socktype, int port_offset, Error **errp);
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int inet_connect(const char *str, Error **errp);
+int inet_nonblocking_connect(const char *str,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp);
+
+int inet_dgram_opts(QemuOpts *opts, Error **errp);
+const char *inet_strfamily(int family);
+
+int unix_listen_opts(QemuOpts *opts, Error **errp);
+int unix_listen(const char *path, char *ostr, int olen, Error **errp);
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int unix_connect(const char *path, Error **errp);
+int unix_nonblocking_connect(const char *str,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp);
+
+SocketAddress *socket_parse(const char *str, Error **errp);
+int socket_connect(SocketAddress *addr, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque);
+int socket_listen(SocketAddress *addr, Error **errp);
+int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp);
+
+/* Old, ipv4 only bits. Don't use for new code. */
+int parse_host_port(struct sockaddr_in *saddr, const char *str);
+int socket_init(void);
+
+#endif /* QEMU_SOCKET_H */
diff --git a/contrib/qemu/include/qemu/thread-posix.h b/contrib/qemu/include/qemu/thread-posix.h
new file mode 100644
index 000000000..0f30dccb5
--- /dev/null
+++ b/contrib/qemu/include/qemu/thread-posix.h
@@ -0,0 +1,28 @@
+#ifndef __QEMU_THREAD_POSIX_H
+#define __QEMU_THREAD_POSIX_H 1
+#include "pthread.h"
+#include <semaphore.h>
+
+struct QemuMutex {
+ pthread_mutex_t lock;
+};
+
+struct QemuCond {
+ pthread_cond_t cond;
+};
+
+struct QemuSemaphore {
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ int count;
+#else
+ sem_t sem;
+#endif
+};
+
+struct QemuThread {
+ pthread_t thread;
+};
+
+#endif
diff --git a/contrib/qemu/include/qemu/thread.h b/contrib/qemu/include/qemu/thread.h
new file mode 100644
index 000000000..c02404b9f
--- /dev/null
+++ b/contrib/qemu/include/qemu/thread.h
@@ -0,0 +1,56 @@
+#ifndef __QEMU_THREAD_H
+#define __QEMU_THREAD_H 1
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+typedef struct QemuMutex QemuMutex;
+typedef struct QemuCond QemuCond;
+typedef struct QemuSemaphore QemuSemaphore;
+typedef struct QemuThread QemuThread;
+
+#ifdef _WIN32
+#include "qemu/thread-win32.h"
+#else
+#include "qemu/thread-posix.h"
+#endif
+
+#define QEMU_THREAD_JOINABLE 0
+#define QEMU_THREAD_DETACHED 1
+
+void qemu_mutex_init(QemuMutex *mutex);
+void qemu_mutex_destroy(QemuMutex *mutex);
+void qemu_mutex_lock(QemuMutex *mutex);
+int qemu_mutex_trylock(QemuMutex *mutex);
+void qemu_mutex_unlock(QemuMutex *mutex);
+
+#define rcu_read_lock() do { } while (0)
+#define rcu_read_unlock() do { } while (0)
+
+void qemu_cond_init(QemuCond *cond);
+void qemu_cond_destroy(QemuCond *cond);
+
+/*
+ * IMPORTANT: The implementation does not guarantee that pthread_cond_signal
+ * and pthread_cond_broadcast can be called except while the same mutex is
+ * held as in the corresponding pthread_cond_wait calls!
+ */
+void qemu_cond_signal(QemuCond *cond);
+void qemu_cond_broadcast(QemuCond *cond);
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex);
+
+void qemu_sem_init(QemuSemaphore *sem, int init);
+void qemu_sem_post(QemuSemaphore *sem);
+void qemu_sem_wait(QemuSemaphore *sem);
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms);
+void qemu_sem_destroy(QemuSemaphore *sem);
+
+void qemu_thread_create(QemuThread *thread,
+ void *(*start_routine)(void *),
+ void *arg, int mode);
+void *qemu_thread_join(QemuThread *thread);
+void qemu_thread_get_self(QemuThread *thread);
+bool qemu_thread_is_self(QemuThread *thread);
+void qemu_thread_exit(void *retval);
+
+#endif
diff --git a/contrib/qemu/include/qemu/timer.h b/contrib/qemu/include/qemu/timer.h
new file mode 100644
index 000000000..9dd206ce7
--- /dev/null
+++ b/contrib/qemu/include/qemu/timer.h
@@ -0,0 +1,305 @@
+#ifndef QEMU_TIMER_H
+#define QEMU_TIMER_H
+
+#include "qemu-common.h"
+#include "qemu/main-loop.h"
+#include "qemu/notify.h"
+
+/* timers */
+
+#define SCALE_MS 1000000
+#define SCALE_US 1000
+#define SCALE_NS 1
+
+typedef struct QEMUClock QEMUClock;
+typedef void QEMUTimerCB(void *opaque);
+
+/* The real time clock should be used only for stuff which does not
+ change the virtual machine state, as it is run even if the virtual
+ machine is stopped. The real time clock has a frequency of 1000
+ Hz. */
+extern QEMUClock *rt_clock;
+
+/* The virtual clock is only run during the emulation. It is stopped
+ when the virtual machine is stopped. Virtual timers use a high
+ precision clock, usually cpu cycles (use ticks_per_sec). */
+extern QEMUClock *vm_clock;
+
+/* The host clock should be use for device models that emulate accurate
+ real time sources. It will continue to run when the virtual machine
+ is suspended, and it will reflect system time changes the host may
+ undergo (e.g. due to NTP). The host clock has the same precision as
+ the virtual clock. */
+extern QEMUClock *host_clock;
+
+int64_t qemu_get_clock_ns(QEMUClock *clock);
+int64_t qemu_clock_has_timers(QEMUClock *clock);
+int64_t qemu_clock_expired(QEMUClock *clock);
+int64_t qemu_clock_deadline(QEMUClock *clock);
+void qemu_clock_enable(QEMUClock *clock, bool enabled);
+void qemu_clock_warp(QEMUClock *clock);
+
+void qemu_register_clock_reset_notifier(QEMUClock *clock, Notifier *notifier);
+void qemu_unregister_clock_reset_notifier(QEMUClock *clock,
+ Notifier *notifier);
+
+QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
+ QEMUTimerCB *cb, void *opaque);
+void qemu_free_timer(QEMUTimer *ts);
+void qemu_del_timer(QEMUTimer *ts);
+void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time);
+void qemu_mod_timer(QEMUTimer *ts, int64_t expire_time);
+bool qemu_timer_pending(QEMUTimer *ts);
+bool qemu_timer_expired(QEMUTimer *timer_head, int64_t current_time);
+uint64_t qemu_timer_expire_time_ns(QEMUTimer *ts);
+
+void qemu_run_timers(QEMUClock *clock);
+void qemu_run_all_timers(void);
+void configure_alarms(char const *opt);
+void init_clocks(void);
+int init_timer_alarm(void);
+
+int64_t cpu_get_ticks(void);
+void cpu_enable_ticks(void);
+void cpu_disable_ticks(void);
+
+static inline QEMUTimer *qemu_new_timer_ns(QEMUClock *clock, QEMUTimerCB *cb,
+ void *opaque)
+{
+ return qemu_new_timer(clock, SCALE_NS, cb, opaque);
+}
+
+static inline QEMUTimer *qemu_new_timer_ms(QEMUClock *clock, QEMUTimerCB *cb,
+ void *opaque)
+{
+ return qemu_new_timer(clock, SCALE_MS, cb, opaque);
+}
+
+static inline int64_t qemu_get_clock_ms(QEMUClock *clock)
+{
+ return qemu_get_clock_ns(clock) / SCALE_MS;
+}
+
+static inline int64_t get_ticks_per_sec(void)
+{
+ return 1000000000LL;
+}
+
+/* real time host monotonic timer */
+static inline int64_t get_clock_realtime(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec * 1000000000LL + (tv.tv_usec * 1000);
+}
+
+/* Warning: don't insert tracepoints into these functions, they are
+ also used by simpletrace backend and tracepoints would cause
+ an infinite recursion! */
+#ifdef _WIN32
+extern int64_t clock_freq;
+
+static inline int64_t get_clock(void)
+{
+ LARGE_INTEGER ti;
+ QueryPerformanceCounter(&ti);
+ return muldiv64(ti.QuadPart, get_ticks_per_sec(), clock_freq);
+}
+
+#else
+
+extern int use_rt_clock;
+
+static inline int64_t get_clock(void)
+{
+#ifdef CLOCK_MONOTONIC
+ if (use_rt_clock) {
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+ } else
+#endif
+ {
+ /* XXX: using gettimeofday leads to problems if the date
+ changes, so it should be avoided. */
+ return get_clock_realtime();
+ }
+}
+#endif
+
+void qemu_get_timer(QEMUFile *f, QEMUTimer *ts);
+void qemu_put_timer(QEMUFile *f, QEMUTimer *ts);
+
+/* icount */
+int64_t cpu_get_icount(void);
+int64_t cpu_get_clock(void);
+
+/*******************************************/
+/* host CPU ticks (if available) */
+
+#if defined(_ARCH_PPC)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t retval;
+#ifdef _ARCH_PPC64
+ /* This reads timebase in one 64bit go and includes Cell workaround from:
+ http://ozlabs.org/pipermail/linuxppc-dev/2006-October/027052.html
+ */
+ __asm__ __volatile__ ("mftb %0\n\t"
+ "cmpwi %0,0\n\t"
+ "beq- $-8"
+ : "=r" (retval));
+#else
+ /* http://ozlabs.org/pipermail/linuxppc-dev/1999-October/003889.html */
+ unsigned long junk;
+ __asm__ __volatile__ ("mfspr %1,269\n\t" /* mftbu */
+ "mfspr %L0,268\n\t" /* mftb */
+ "mfspr %0,269\n\t" /* mftbu */
+ "cmpw %0,%1\n\t"
+ "bne $-16"
+ : "=r" (retval), "=r" (junk));
+#endif
+ return retval;
+}
+
+#elif defined(__i386__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("rdtsc" : "=A" (val));
+ return val;
+}
+
+#elif defined(__x86_64__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint32_t low,high;
+ int64_t val;
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ val = high;
+ val <<= 32;
+ val |= low;
+ return val;
+}
+
+#elif defined(__hppa__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int val;
+ asm volatile ("mfctl %%cr16, %0" : "=r"(val));
+ return val;
+}
+
+#elif defined(__ia64)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile ("mov %0 = ar.itc" : "=r"(val) :: "memory");
+ return val;
+}
+
+#elif defined(__s390__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ int64_t val;
+ asm volatile("stck 0(%1)" : "=m" (val) : "a" (&val) : "cc");
+ return val;
+}
+
+#elif defined(__sparc__)
+
+static inline int64_t cpu_get_real_ticks (void)
+{
+#if defined(_LP64)
+ uint64_t rval;
+ asm volatile("rd %%tick,%0" : "=r"(rval));
+ return rval;
+#else
+ /* We need an %o or %g register for this. For recent enough gcc
+ there is an "h" constraint for that. Don't bother with that. */
+ union {
+ uint64_t i64;
+ struct {
+ uint32_t high;
+ uint32_t low;
+ } i32;
+ } rval;
+ asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1"
+ : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1");
+ return rval.i64;
+#endif
+}
+
+#elif defined(__mips__) && \
+ ((defined(__mips_isa_rev) && __mips_isa_rev >= 2) || defined(__linux__))
+/*
+ * binutils wants to use rdhwr only on mips32r2
+ * but as linux kernel emulate it, it's fine
+ * to use it.
+ *
+ */
+#define MIPS_RDHWR(rd, value) { \
+ __asm__ __volatile__ (".set push\n\t" \
+ ".set mips32r2\n\t" \
+ "rdhwr %0, "rd"\n\t" \
+ ".set pop" \
+ : "=r" (value)); \
+ }
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ /* On kernels >= 2.6.25 rdhwr <reg>, $2 and $3 are emulated */
+ uint32_t count;
+ static uint32_t cyc_per_count = 0;
+
+ if (!cyc_per_count) {
+ MIPS_RDHWR("$3", cyc_per_count);
+ }
+
+ MIPS_RDHWR("$2", count);
+ return (int64_t)(count * cyc_per_count);
+}
+
+#elif defined(__alpha__)
+
+static inline int64_t cpu_get_real_ticks(void)
+{
+ uint64_t cc;
+ uint32_t cur, ofs;
+
+ asm volatile("rpcc %0" : "=r"(cc));
+ cur = cc;
+ ofs = cc >> 32;
+ return cur - ofs;
+}
+
+#else
+/* The host CPU doesn't have an easily accessible cycle counter.
+ Just return a monotonically increasing value. This will be
+ totally wrong, but hopefully better than nothing. */
+static inline int64_t cpu_get_real_ticks (void)
+{
+ static int64_t ticks = 0;
+ return ticks++;
+}
+#endif
+
+#ifdef CONFIG_PROFILER
+static inline int64_t profile_getclock(void)
+{
+ return cpu_get_real_ticks();
+}
+
+extern int64_t qemu_time, qemu_time_start;
+extern int64_t tlb_flush_time;
+extern int64_t dev_time;
+#endif
+
+#endif
diff --git a/contrib/qemu/include/qemu/typedefs.h b/contrib/qemu/include/qemu/typedefs.h
new file mode 100644
index 000000000..ac9f8d41a
--- /dev/null
+++ b/contrib/qemu/include/qemu/typedefs.h
@@ -0,0 +1,69 @@
+#ifndef QEMU_TYPEDEFS_H
+#define QEMU_TYPEDEFS_H
+
+/* A load of opaque types so that device init declarations don't have to
+ pull in all the real definitions. */
+typedef struct QEMUTimer QEMUTimer;
+typedef struct QEMUFile QEMUFile;
+typedef struct QEMUBH QEMUBH;
+
+struct Monitor;
+typedef struct Monitor Monitor;
+typedef struct MigrationParams MigrationParams;
+
+typedef struct Property Property;
+typedef struct PropertyInfo PropertyInfo;
+typedef struct CompatProperty CompatProperty;
+typedef struct DeviceState DeviceState;
+typedef struct BusState BusState;
+typedef struct BusClass BusClass;
+
+typedef struct AddressSpace AddressSpace;
+typedef struct MemoryRegion MemoryRegion;
+typedef struct MemoryRegionSection MemoryRegionSection;
+
+typedef struct MemoryMappingList MemoryMappingList;
+
+typedef struct NICInfo NICInfo;
+typedef struct HCIInfo HCIInfo;
+typedef struct AudioState AudioState;
+typedef struct BlockDriverState BlockDriverState;
+typedef struct DriveInfo DriveInfo;
+typedef struct DisplayState DisplayState;
+typedef struct DisplayChangeListener DisplayChangeListener;
+typedef struct DisplaySurface DisplaySurface;
+typedef struct PixelFormat PixelFormat;
+typedef struct QemuConsole QemuConsole;
+typedef struct CharDriverState CharDriverState;
+typedef struct MACAddr MACAddr;
+typedef struct NetClientState NetClientState;
+typedef struct i2c_bus i2c_bus;
+typedef struct ISABus ISABus;
+typedef struct ISADevice ISADevice;
+typedef struct SMBusDevice SMBusDevice;
+typedef struct PCIHostState PCIHostState;
+typedef struct PCIExpressHost PCIExpressHost;
+typedef struct PCIBus PCIBus;
+typedef struct PCIDevice PCIDevice;
+typedef struct PCIExpressDevice PCIExpressDevice;
+typedef struct PCIBridge PCIBridge;
+typedef struct PCIEAERMsg PCIEAERMsg;
+typedef struct PCIEAERLog PCIEAERLog;
+typedef struct PCIEAERErr PCIEAERErr;
+typedef struct PCIEPort PCIEPort;
+typedef struct PCIESlot PCIESlot;
+typedef struct MSIMessage MSIMessage;
+typedef struct SerialState SerialState;
+typedef struct PCMCIACardState PCMCIACardState;
+typedef struct MouseTransformInfo MouseTransformInfo;
+typedef struct uWireSlave uWireSlave;
+typedef struct I2SCodec I2SCodec;
+typedef struct SSIBus SSIBus;
+typedef struct EventNotifier EventNotifier;
+typedef struct VirtIODevice VirtIODevice;
+typedef struct QEMUSGList QEMUSGList;
+typedef struct SHPCDevice SHPCDevice;
+typedef struct FWCfgState FWCfgState;
+typedef struct PcGuestInfo PcGuestInfo;
+
+#endif /* QEMU_TYPEDEFS_H */
diff --git a/contrib/qemu/include/sysemu/os-posix.h b/contrib/qemu/include/sysemu/os-posix.h
new file mode 100644
index 000000000..25d0b2a73
--- /dev/null
+++ b/contrib/qemu/include/sysemu/os-posix.h
@@ -0,0 +1,52 @@
+/*
+ * posix specific declarations
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef QEMU_OS_POSIX_H
+#define QEMU_OS_POSIX_H
+
+void os_set_line_buffering(void);
+void os_set_proc_name(const char *s);
+void os_setup_signal_handling(void);
+void os_daemonize(void);
+void os_setup_post(void);
+int os_mlock(void);
+
+typedef struct timeval qemu_timeval;
+#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
+
+#ifndef CONFIG_UTIMENSAT
+#ifndef UTIME_NOW
+# define UTIME_NOW ((1l << 30) - 1l)
+#endif
+#ifndef UTIME_OMIT
+# define UTIME_OMIT ((1l << 30) - 2l)
+#endif
+#endif
+typedef struct timespec qemu_timespec;
+int qemu_utimens(const char *path, const qemu_timespec *times);
+
+bool is_daemonized(void);
+
+#endif
diff --git a/contrib/qemu/include/sysemu/sysemu.h b/contrib/qemu/include/sysemu/sysemu.h
new file mode 100644
index 000000000..3caeb66eb
--- /dev/null
+++ b/contrib/qemu/include/sysemu/sysemu.h
@@ -0,0 +1,200 @@
+#ifndef SYSEMU_H
+#define SYSEMU_H
+/* Misc. things related to the system emulator. */
+
+#include "qemu/typedefs.h"
+#include "qemu/option.h"
+#include "qemu/queue.h"
+#include "qemu/timer.h"
+#include "qapi-types.h"
+#include "qemu/notify.h"
+#include "qemu/main-loop.h"
+
+/* vl.c */
+
+extern const char *bios_name;
+
+extern const char *qemu_name;
+extern uint8_t qemu_uuid[];
+int qemu_uuid_parse(const char *str, uint8_t *uuid);
+#define UUID_FMT "%02hhx%02hhx%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
+
+bool runstate_check(RunState state);
+void runstate_set(RunState new_state);
+int runstate_is_running(void);
+bool runstate_needs_reset(void);
+typedef struct vm_change_state_entry VMChangeStateEntry;
+typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
+
+VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
+ void *opaque);
+void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
+void vm_state_notify(int running, RunState state);
+
+#define VMRESET_SILENT false
+#define VMRESET_REPORT true
+
+void vm_start(void);
+int vm_stop(RunState state);
+int vm_stop_force_state(RunState state);
+
+typedef enum WakeupReason {
+ QEMU_WAKEUP_REASON_OTHER = 0,
+ QEMU_WAKEUP_REASON_RTC,
+ QEMU_WAKEUP_REASON_PMTIMER,
+} WakeupReason;
+
+void qemu_system_reset_request(void);
+void qemu_system_suspend_request(void);
+void qemu_register_suspend_notifier(Notifier *notifier);
+void qemu_system_wakeup_request(WakeupReason reason);
+void qemu_system_wakeup_enable(WakeupReason reason, bool enabled);
+void qemu_register_wakeup_notifier(Notifier *notifier);
+void qemu_system_shutdown_request(void);
+void qemu_system_powerdown_request(void);
+void qemu_register_powerdown_notifier(Notifier *notifier);
+void qemu_system_debug_request(void);
+void qemu_system_vmstop_request(RunState reason);
+int qemu_shutdown_requested_get(void);
+int qemu_reset_requested_get(void);
+void qemu_system_killed(int signal, pid_t pid);
+void qemu_devices_reset(void);
+void qemu_system_reset(bool report);
+
+void qemu_add_exit_notifier(Notifier *notify);
+void qemu_remove_exit_notifier(Notifier *notify);
+
+void qemu_add_machine_init_done_notifier(Notifier *notify);
+
+void do_savevm(Monitor *mon, const QDict *qdict);
+int load_vmstate(const char *name);
+void do_delvm(Monitor *mon, const QDict *qdict);
+void do_info_snapshots(Monitor *mon, const QDict *qdict);
+
+void qemu_announce_self(void);
+
+bool qemu_savevm_state_blocked(Error **errp);
+void qemu_savevm_state_begin(QEMUFile *f,
+ const MigrationParams *params);
+int qemu_savevm_state_iterate(QEMUFile *f);
+void qemu_savevm_state_complete(QEMUFile *f);
+void qemu_savevm_state_cancel(void);
+uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size);
+int qemu_loadvm_state(QEMUFile *f);
+
+/* SLIRP */
+void do_info_slirp(Monitor *mon);
+
+typedef enum DisplayType
+{
+ DT_DEFAULT,
+ DT_CURSES,
+ DT_SDL,
+ DT_GTK,
+ DT_NOGRAPHIC,
+ DT_NONE,
+} DisplayType;
+
+extern int autostart;
+
+typedef enum {
+ VGA_NONE, VGA_STD, VGA_CIRRUS, VGA_VMWARE, VGA_XENFB, VGA_QXL,
+} VGAInterfaceType;
+
+extern int vga_interface_type;
+#define xenfb_enabled (vga_interface_type == VGA_XENFB)
+#define qxl_enabled (vga_interface_type == VGA_QXL)
+
+extern int graphic_width;
+extern int graphic_height;
+extern int graphic_depth;
+extern DisplayType display_type;
+extern const char *keyboard_layout;
+extern int win2k_install_hack;
+extern int alt_grab;
+extern int ctrl_grab;
+extern int smp_cpus;
+extern int max_cpus;
+extern int cursor_hide;
+extern int graphic_rotate;
+extern int no_quit;
+extern int no_shutdown;
+extern int semihosting_enabled;
+extern int old_param;
+extern int boot_menu;
+extern uint8_t *boot_splash_filedata;
+extern size_t boot_splash_filedata_size;
+extern uint8_t qemu_extra_params_fw[2];
+extern QEMUClock *rtc_clock;
+
+#define MAX_NODES 64
+#define MAX_CPUMASK_BITS 255
+extern int nb_numa_nodes;
+extern uint64_t node_mem[MAX_NODES];
+extern unsigned long *node_cpumask[MAX_NODES];
+
+#define MAX_OPTION_ROMS 16
+typedef struct QEMUOptionRom {
+ const char *name;
+ int32_t bootindex;
+} QEMUOptionRom;
+extern QEMUOptionRom option_rom[MAX_OPTION_ROMS];
+extern int nb_option_roms;
+
+#define MAX_PROM_ENVS 128
+extern const char *prom_envs[MAX_PROM_ENVS];
+extern unsigned int nb_prom_envs;
+
+/* pci-hotplug */
+void pci_device_hot_add(Monitor *mon, const QDict *qdict);
+int pci_drive_hot_add(Monitor *mon, const QDict *qdict, DriveInfo *dinfo);
+void do_pci_device_hot_remove(Monitor *mon, const QDict *qdict);
+
+/* generic hotplug */
+void drive_hot_add(Monitor *mon, const QDict *qdict);
+
+/* CPU hotplug */
+void qemu_register_cpu_added_notifier(Notifier *notifier);
+
+/* pcie aer error injection */
+void pcie_aer_inject_error_print(Monitor *mon, const QObject *data);
+int do_pcie_aer_inject_error(Monitor *mon,
+ const QDict *qdict, QObject **ret_data);
+
+/* serial ports */
+
+#define MAX_SERIAL_PORTS 4
+
+extern CharDriverState *serial_hds[MAX_SERIAL_PORTS];
+
+/* parallel ports */
+
+#define MAX_PARALLEL_PORTS 3
+
+extern CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
+
+void do_usb_add(Monitor *mon, const QDict *qdict);
+void do_usb_del(Monitor *mon, const QDict *qdict);
+void usb_info(Monitor *mon, const QDict *qdict);
+
+void rtc_change_mon_event(struct tm *tm);
+
+void add_boot_device_path(int32_t bootindex, DeviceState *dev,
+ const char *suffix);
+char *get_boot_devices_list(size_t *size);
+
+DeviceState *get_boot_device(uint32_t position);
+
+QemuOpts *qemu_get_machine_opts(void);
+
+bool usb_enabled(bool default_usb);
+
+extern QemuOptsList qemu_drive_opts;
+extern QemuOptsList qemu_chardev_opts;
+extern QemuOptsList qemu_device_opts;
+extern QemuOptsList qemu_netdev_opts;
+extern QemuOptsList qemu_net_opts;
+extern QemuOptsList qemu_global_opts;
+extern QemuOptsList qemu_mon_opts;
+
+#endif
diff --git a/contrib/qemu/include/trace.h b/contrib/qemu/include/trace.h
new file mode 100644
index 000000000..c15f49812
--- /dev/null
+++ b/contrib/qemu/include/trace.h
@@ -0,0 +1,6 @@
+#ifndef TRACE_H
+#define TRACE_H
+
+#include "trace/generated-tracers.h"
+
+#endif /* TRACE_H */
diff --git a/contrib/qemu/nop-symbols.c b/contrib/qemu/nop-symbols.c
new file mode 100644
index 000000000..ae93a3d3b
--- /dev/null
+++ b/contrib/qemu/nop-symbols.c
@@ -0,0 +1,12 @@
+int notifier_with_return_list_init () { return 0; }
+int notifier_with_return_list_notify () { return 0; }
+int notifier_with_return_list_add () { return 0; }
+int notifier_list_init () { return 0; }
+int notifier_list_notify () { return 0; }
+int notifier_list_add () { return 0; }
+int monitor_protocol_event () { return 0; }
+int block_job_cancel_sync () { return 0; }
+int block_job_iostatus_reset () { return 0; }
+int vm_stop () { return 0; }
+int qemu_get_aio_context () { return 0; }
+
diff --git a/contrib/qemu/qapi-types.h b/contrib/qemu/qapi-types.h
new file mode 100644
index 000000000..082b06d1c
--- /dev/null
+++ b/contrib/qemu/qapi-types.h
@@ -0,0 +1,2746 @@
+/* AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+ * schema-defined QAPI types
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QAPI_TYPES_H
+#define QAPI_TYPES_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#ifndef QAPI_TYPES_BUILTIN_STRUCT_DECL_H
+#define QAPI_TYPES_BUILTIN_STRUCT_DECL_H
+
+
+typedef struct strList
+{
+ union {
+ char * value;
+ uint64_t padding;
+ };
+ struct strList *next;
+} strList;
+
+typedef struct intList
+{
+ union {
+ int64_t value;
+ uint64_t padding;
+ };
+ struct intList *next;
+} intList;
+
+typedef struct numberList
+{
+ union {
+ double value;
+ uint64_t padding;
+ };
+ struct numberList *next;
+} numberList;
+
+typedef struct boolList
+{
+ union {
+ bool value;
+ uint64_t padding;
+ };
+ struct boolList *next;
+} boolList;
+
+typedef struct int8List
+{
+ union {
+ int8_t value;
+ uint64_t padding;
+ };
+ struct int8List *next;
+} int8List;
+
+typedef struct int16List
+{
+ union {
+ int16_t value;
+ uint64_t padding;
+ };
+ struct int16List *next;
+} int16List;
+
+typedef struct int32List
+{
+ union {
+ int32_t value;
+ uint64_t padding;
+ };
+ struct int32List *next;
+} int32List;
+
+typedef struct int64List
+{
+ union {
+ int64_t value;
+ uint64_t padding;
+ };
+ struct int64List *next;
+} int64List;
+
+typedef struct uint8List
+{
+ union {
+ uint8_t value;
+ uint64_t padding;
+ };
+ struct uint8List *next;
+} uint8List;
+
+typedef struct uint16List
+{
+ union {
+ uint16_t value;
+ uint64_t padding;
+ };
+ struct uint16List *next;
+} uint16List;
+
+typedef struct uint32List
+{
+ union {
+ uint32_t value;
+ uint64_t padding;
+ };
+ struct uint32List *next;
+} uint32List;
+
+typedef struct uint64List
+{
+ union {
+ uint64_t value;
+ uint64_t padding;
+ };
+ struct uint64List *next;
+} uint64List;
+
+#endif /* QAPI_TYPES_BUILTIN_STRUCT_DECL_H */
+
+
+extern const char *ErrorClass_lookup[];
+typedef enum ErrorClass
+{
+ ERROR_CLASS_GENERIC_ERROR = 0,
+ ERROR_CLASS_COMMAND_NOT_FOUND = 1,
+ ERROR_CLASS_DEVICE_ENCRYPTED = 2,
+ ERROR_CLASS_DEVICE_NOT_ACTIVE = 3,
+ ERROR_CLASS_DEVICE_NOT_FOUND = 4,
+ ERROR_CLASS_K_V_M_MISSING_CAP = 5,
+ ERROR_CLASS_MAX = 6,
+} ErrorClass;
+
+typedef struct ErrorClassList
+{
+ ErrorClass value;
+ struct ErrorClassList *next;
+} ErrorClassList;
+
+
+typedef struct NameInfo NameInfo;
+
+typedef struct NameInfoList
+{
+ union {
+ NameInfo *value;
+ uint64_t padding;
+ };
+ struct NameInfoList *next;
+} NameInfoList;
+
+
+typedef struct VersionInfo VersionInfo;
+
+typedef struct VersionInfoList
+{
+ union {
+ VersionInfo *value;
+ uint64_t padding;
+ };
+ struct VersionInfoList *next;
+} VersionInfoList;
+
+
+typedef struct KvmInfo KvmInfo;
+
+typedef struct KvmInfoList
+{
+ union {
+ KvmInfo *value;
+ uint64_t padding;
+ };
+ struct KvmInfoList *next;
+} KvmInfoList;
+
+extern const char *RunState_lookup[];
+typedef enum RunState
+{
+ RUN_STATE_DEBUG = 0,
+ RUN_STATE_INMIGRATE = 1,
+ RUN_STATE_INTERNAL_ERROR = 2,
+ RUN_STATE_IO_ERROR = 3,
+ RUN_STATE_PAUSED = 4,
+ RUN_STATE_POSTMIGRATE = 5,
+ RUN_STATE_PRELAUNCH = 6,
+ RUN_STATE_FINISH_MIGRATE = 7,
+ RUN_STATE_RESTORE_VM = 8,
+ RUN_STATE_RUNNING = 9,
+ RUN_STATE_SAVE_VM = 10,
+ RUN_STATE_SHUTDOWN = 11,
+ RUN_STATE_SUSPENDED = 12,
+ RUN_STATE_WATCHDOG = 13,
+ RUN_STATE_GUEST_PANICKED = 14,
+ RUN_STATE_MAX = 15,
+} RunState;
+
+typedef struct RunStateList
+{
+ RunState value;
+ struct RunStateList *next;
+} RunStateList;
+
+
+typedef struct SnapshotInfo SnapshotInfo;
+
+typedef struct SnapshotInfoList
+{
+ union {
+ SnapshotInfo *value;
+ uint64_t padding;
+ };
+ struct SnapshotInfoList *next;
+} SnapshotInfoList;
+
+
+typedef struct ImageInfo ImageInfo;
+
+typedef struct ImageInfoList
+{
+ union {
+ ImageInfo *value;
+ uint64_t padding;
+ };
+ struct ImageInfoList *next;
+} ImageInfoList;
+
+
+typedef struct ImageCheck ImageCheck;
+
+typedef struct ImageCheckList
+{
+ union {
+ ImageCheck *value;
+ uint64_t padding;
+ };
+ struct ImageCheckList *next;
+} ImageCheckList;
+
+
+typedef struct StatusInfo StatusInfo;
+
+typedef struct StatusInfoList
+{
+ union {
+ StatusInfo *value;
+ uint64_t padding;
+ };
+ struct StatusInfoList *next;
+} StatusInfoList;
+
+
+typedef struct UuidInfo UuidInfo;
+
+typedef struct UuidInfoList
+{
+ union {
+ UuidInfo *value;
+ uint64_t padding;
+ };
+ struct UuidInfoList *next;
+} UuidInfoList;
+
+
+typedef struct ChardevInfo ChardevInfo;
+
+typedef struct ChardevInfoList
+{
+ union {
+ ChardevInfo *value;
+ uint64_t padding;
+ };
+ struct ChardevInfoList *next;
+} ChardevInfoList;
+
+extern const char *DataFormat_lookup[];
+typedef enum DataFormat
+{
+ DATA_FORMAT_UTF8 = 0,
+ DATA_FORMAT_BASE64 = 1,
+ DATA_FORMAT_MAX = 2,
+} DataFormat;
+
+typedef struct DataFormatList
+{
+ DataFormat value;
+ struct DataFormatList *next;
+} DataFormatList;
+
+
+typedef struct CommandInfo CommandInfo;
+
+typedef struct CommandInfoList
+{
+ union {
+ CommandInfo *value;
+ uint64_t padding;
+ };
+ struct CommandInfoList *next;
+} CommandInfoList;
+
+
+typedef struct EventInfo EventInfo;
+
+typedef struct EventInfoList
+{
+ union {
+ EventInfo *value;
+ uint64_t padding;
+ };
+ struct EventInfoList *next;
+} EventInfoList;
+
+
+typedef struct MigrationStats MigrationStats;
+
+typedef struct MigrationStatsList
+{
+ union {
+ MigrationStats *value;
+ uint64_t padding;
+ };
+ struct MigrationStatsList *next;
+} MigrationStatsList;
+
+
+typedef struct XBZRLECacheStats XBZRLECacheStats;
+
+typedef struct XBZRLECacheStatsList
+{
+ union {
+ XBZRLECacheStats *value;
+ uint64_t padding;
+ };
+ struct XBZRLECacheStatsList *next;
+} XBZRLECacheStatsList;
+
+
+typedef struct MigrationInfo MigrationInfo;
+
+typedef struct MigrationInfoList
+{
+ union {
+ MigrationInfo *value;
+ uint64_t padding;
+ };
+ struct MigrationInfoList *next;
+} MigrationInfoList;
+
+extern const char *MigrationCapability_lookup[];
+typedef enum MigrationCapability
+{
+ MIGRATION_CAPABILITY_XBZRLE = 0,
+ MIGRATION_CAPABILITY_X_RDMA_PIN_ALL = 1,
+ MIGRATION_CAPABILITY_AUTO_CONVERGE = 2,
+ MIGRATION_CAPABILITY_MAX = 3,
+} MigrationCapability;
+
+typedef struct MigrationCapabilityList
+{
+ MigrationCapability value;
+ struct MigrationCapabilityList *next;
+} MigrationCapabilityList;
+
+
+typedef struct MigrationCapabilityStatus MigrationCapabilityStatus;
+
+typedef struct MigrationCapabilityStatusList
+{
+ union {
+ MigrationCapabilityStatus *value;
+ uint64_t padding;
+ };
+ struct MigrationCapabilityStatusList *next;
+} MigrationCapabilityStatusList;
+
+
+typedef struct MouseInfo MouseInfo;
+
+typedef struct MouseInfoList
+{
+ union {
+ MouseInfo *value;
+ uint64_t padding;
+ };
+ struct MouseInfoList *next;
+} MouseInfoList;
+
+
+typedef struct CpuInfo CpuInfo;
+
+typedef struct CpuInfoList
+{
+ union {
+ CpuInfo *value;
+ uint64_t padding;
+ };
+ struct CpuInfoList *next;
+} CpuInfoList;
+
+
+typedef struct BlockDeviceInfo BlockDeviceInfo;
+
+typedef struct BlockDeviceInfoList
+{
+ union {
+ BlockDeviceInfo *value;
+ uint64_t padding;
+ };
+ struct BlockDeviceInfoList *next;
+} BlockDeviceInfoList;
+
+extern const char *BlockDeviceIoStatus_lookup[];
+typedef enum BlockDeviceIoStatus
+{
+ BLOCK_DEVICE_IO_STATUS_OK = 0,
+ BLOCK_DEVICE_IO_STATUS_FAILED = 1,
+ BLOCK_DEVICE_IO_STATUS_NOSPACE = 2,
+ BLOCK_DEVICE_IO_STATUS_MAX = 3,
+} BlockDeviceIoStatus;
+
+typedef struct BlockDeviceIoStatusList
+{
+ BlockDeviceIoStatus value;
+ struct BlockDeviceIoStatusList *next;
+} BlockDeviceIoStatusList;
+
+
+typedef struct BlockDirtyInfo BlockDirtyInfo;
+
+typedef struct BlockDirtyInfoList
+{
+ union {
+ BlockDirtyInfo *value;
+ uint64_t padding;
+ };
+ struct BlockDirtyInfoList *next;
+} BlockDirtyInfoList;
+
+
+typedef struct BlockInfo BlockInfo;
+
+typedef struct BlockInfoList
+{
+ union {
+ BlockInfo *value;
+ uint64_t padding;
+ };
+ struct BlockInfoList *next;
+} BlockInfoList;
+
+
+typedef struct BlockDeviceStats BlockDeviceStats;
+
+typedef struct BlockDeviceStatsList
+{
+ union {
+ BlockDeviceStats *value;
+ uint64_t padding;
+ };
+ struct BlockDeviceStatsList *next;
+} BlockDeviceStatsList;
+
+
+typedef struct BlockStats BlockStats;
+
+typedef struct BlockStatsList
+{
+ union {
+ BlockStats *value;
+ uint64_t padding;
+ };
+ struct BlockStatsList *next;
+} BlockStatsList;
+
+
+typedef struct VncClientInfo VncClientInfo;
+
+typedef struct VncClientInfoList
+{
+ union {
+ VncClientInfo *value;
+ uint64_t padding;
+ };
+ struct VncClientInfoList *next;
+} VncClientInfoList;
+
+
+typedef struct VncInfo VncInfo;
+
+typedef struct VncInfoList
+{
+ union {
+ VncInfo *value;
+ uint64_t padding;
+ };
+ struct VncInfoList *next;
+} VncInfoList;
+
+
+typedef struct SpiceChannel SpiceChannel;
+
+typedef struct SpiceChannelList
+{
+ union {
+ SpiceChannel *value;
+ uint64_t padding;
+ };
+ struct SpiceChannelList *next;
+} SpiceChannelList;
+
+extern const char *SpiceQueryMouseMode_lookup[];
+typedef enum SpiceQueryMouseMode
+{
+ SPICE_QUERY_MOUSE_MODE_CLIENT = 0,
+ SPICE_QUERY_MOUSE_MODE_SERVER = 1,
+ SPICE_QUERY_MOUSE_MODE_UNKNOWN = 2,
+ SPICE_QUERY_MOUSE_MODE_MAX = 3,
+} SpiceQueryMouseMode;
+
+typedef struct SpiceQueryMouseModeList
+{
+ SpiceQueryMouseMode value;
+ struct SpiceQueryMouseModeList *next;
+} SpiceQueryMouseModeList;
+
+
+typedef struct SpiceInfo SpiceInfo;
+
+typedef struct SpiceInfoList
+{
+ union {
+ SpiceInfo *value;
+ uint64_t padding;
+ };
+ struct SpiceInfoList *next;
+} SpiceInfoList;
+
+
+typedef struct BalloonInfo BalloonInfo;
+
+typedef struct BalloonInfoList
+{
+ union {
+ BalloonInfo *value;
+ uint64_t padding;
+ };
+ struct BalloonInfoList *next;
+} BalloonInfoList;
+
+
+typedef struct PciMemoryRange PciMemoryRange;
+
+typedef struct PciMemoryRangeList
+{
+ union {
+ PciMemoryRange *value;
+ uint64_t padding;
+ };
+ struct PciMemoryRangeList *next;
+} PciMemoryRangeList;
+
+
+typedef struct PciMemoryRegion PciMemoryRegion;
+
+typedef struct PciMemoryRegionList
+{
+ union {
+ PciMemoryRegion *value;
+ uint64_t padding;
+ };
+ struct PciMemoryRegionList *next;
+} PciMemoryRegionList;
+
+
+typedef struct PciBridgeInfo PciBridgeInfo;
+
+typedef struct PciBridgeInfoList
+{
+ union {
+ PciBridgeInfo *value;
+ uint64_t padding;
+ };
+ struct PciBridgeInfoList *next;
+} PciBridgeInfoList;
+
+
+typedef struct PciDeviceInfo PciDeviceInfo;
+
+typedef struct PciDeviceInfoList
+{
+ union {
+ PciDeviceInfo *value;
+ uint64_t padding;
+ };
+ struct PciDeviceInfoList *next;
+} PciDeviceInfoList;
+
+
+typedef struct PciInfo PciInfo;
+
+typedef struct PciInfoList
+{
+ union {
+ PciInfo *value;
+ uint64_t padding;
+ };
+ struct PciInfoList *next;
+} PciInfoList;
+
+extern const char *BlockdevOnError_lookup[];
+typedef enum BlockdevOnError
+{
+ BLOCKDEV_ON_ERROR_REPORT = 0,
+ BLOCKDEV_ON_ERROR_IGNORE = 1,
+ BLOCKDEV_ON_ERROR_ENOSPC = 2,
+ BLOCKDEV_ON_ERROR_STOP = 3,
+ BLOCKDEV_ON_ERROR_MAX = 4,
+} BlockdevOnError;
+
+typedef struct BlockdevOnErrorList
+{
+ BlockdevOnError value;
+ struct BlockdevOnErrorList *next;
+} BlockdevOnErrorList;
+
+extern const char *MirrorSyncMode_lookup[];
+typedef enum MirrorSyncMode
+{
+ MIRROR_SYNC_MODE_TOP = 0,
+ MIRROR_SYNC_MODE_FULL = 1,
+ MIRROR_SYNC_MODE_NONE = 2,
+ MIRROR_SYNC_MODE_MAX = 3,
+} MirrorSyncMode;
+
+typedef struct MirrorSyncModeList
+{
+ MirrorSyncMode value;
+ struct MirrorSyncModeList *next;
+} MirrorSyncModeList;
+
+
+typedef struct BlockJobInfo BlockJobInfo;
+
+typedef struct BlockJobInfoList
+{
+ union {
+ BlockJobInfo *value;
+ uint64_t padding;
+ };
+ struct BlockJobInfoList *next;
+} BlockJobInfoList;
+
+extern const char *NewImageMode_lookup[];
+typedef enum NewImageMode
+{
+ NEW_IMAGE_MODE_EXISTING = 0,
+ NEW_IMAGE_MODE_ABSOLUTE_PATHS = 1,
+ NEW_IMAGE_MODE_MAX = 2,
+} NewImageMode;
+
+typedef struct NewImageModeList
+{
+ NewImageMode value;
+ struct NewImageModeList *next;
+} NewImageModeList;
+
+
+typedef struct BlockdevSnapshot BlockdevSnapshot;
+
+typedef struct BlockdevSnapshotList
+{
+ union {
+ BlockdevSnapshot *value;
+ uint64_t padding;
+ };
+ struct BlockdevSnapshotList *next;
+} BlockdevSnapshotList;
+
+
+typedef struct DriveBackup DriveBackup;
+
+typedef struct DriveBackupList
+{
+ union {
+ DriveBackup *value;
+ uint64_t padding;
+ };
+ struct DriveBackupList *next;
+} DriveBackupList;
+
+
+typedef struct Abort Abort;
+
+typedef struct AbortList
+{
+ union {
+ Abort *value;
+ uint64_t padding;
+ };
+ struct AbortList *next;
+} AbortList;
+
+
+typedef struct TransactionAction TransactionAction;
+
+typedef struct TransactionActionList
+{
+ union {
+ TransactionAction *value;
+ uint64_t padding;
+ };
+ struct TransactionActionList *next;
+} TransactionActionList;
+
+extern const char *TransactionActionKind_lookup[];
+typedef enum TransactionActionKind
+{
+ TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC = 0,
+ TRANSACTION_ACTION_KIND_DRIVE_BACKUP = 1,
+ TRANSACTION_ACTION_KIND_ABORT = 2,
+ TRANSACTION_ACTION_KIND_MAX = 3,
+} TransactionActionKind;
+
+
+typedef struct ObjectPropertyInfo ObjectPropertyInfo;
+
+typedef struct ObjectPropertyInfoList
+{
+ union {
+ ObjectPropertyInfo *value;
+ uint64_t padding;
+ };
+ struct ObjectPropertyInfoList *next;
+} ObjectPropertyInfoList;
+
+
+typedef struct ObjectTypeInfo ObjectTypeInfo;
+
+typedef struct ObjectTypeInfoList
+{
+ union {
+ ObjectTypeInfo *value;
+ uint64_t padding;
+ };
+ struct ObjectTypeInfoList *next;
+} ObjectTypeInfoList;
+
+
+typedef struct DevicePropertyInfo DevicePropertyInfo;
+
+typedef struct DevicePropertyInfoList
+{
+ union {
+ DevicePropertyInfo *value;
+ uint64_t padding;
+ };
+ struct DevicePropertyInfoList *next;
+} DevicePropertyInfoList;
+
+
+typedef struct NetdevNoneOptions NetdevNoneOptions;
+
+typedef struct NetdevNoneOptionsList
+{
+ union {
+ NetdevNoneOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevNoneOptionsList *next;
+} NetdevNoneOptionsList;
+
+
+typedef struct NetLegacyNicOptions NetLegacyNicOptions;
+
+typedef struct NetLegacyNicOptionsList
+{
+ union {
+ NetLegacyNicOptions *value;
+ uint64_t padding;
+ };
+ struct NetLegacyNicOptionsList *next;
+} NetLegacyNicOptionsList;
+
+
+typedef struct String String;
+
+typedef struct StringList
+{
+ union {
+ String *value;
+ uint64_t padding;
+ };
+ struct StringList *next;
+} StringList;
+
+
+typedef struct NetdevUserOptions NetdevUserOptions;
+
+typedef struct NetdevUserOptionsList
+{
+ union {
+ NetdevUserOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevUserOptionsList *next;
+} NetdevUserOptionsList;
+
+
+typedef struct NetdevTapOptions NetdevTapOptions;
+
+typedef struct NetdevTapOptionsList
+{
+ union {
+ NetdevTapOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevTapOptionsList *next;
+} NetdevTapOptionsList;
+
+
+typedef struct NetdevSocketOptions NetdevSocketOptions;
+
+typedef struct NetdevSocketOptionsList
+{
+ union {
+ NetdevSocketOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevSocketOptionsList *next;
+} NetdevSocketOptionsList;
+
+
+typedef struct NetdevVdeOptions NetdevVdeOptions;
+
+typedef struct NetdevVdeOptionsList
+{
+ union {
+ NetdevVdeOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevVdeOptionsList *next;
+} NetdevVdeOptionsList;
+
+
+typedef struct NetdevDumpOptions NetdevDumpOptions;
+
+typedef struct NetdevDumpOptionsList
+{
+ union {
+ NetdevDumpOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevDumpOptionsList *next;
+} NetdevDumpOptionsList;
+
+
+typedef struct NetdevBridgeOptions NetdevBridgeOptions;
+
+typedef struct NetdevBridgeOptionsList
+{
+ union {
+ NetdevBridgeOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevBridgeOptionsList *next;
+} NetdevBridgeOptionsList;
+
+
+typedef struct NetdevHubPortOptions NetdevHubPortOptions;
+
+typedef struct NetdevHubPortOptionsList
+{
+ union {
+ NetdevHubPortOptions *value;
+ uint64_t padding;
+ };
+ struct NetdevHubPortOptionsList *next;
+} NetdevHubPortOptionsList;
+
+
+typedef struct NetClientOptions NetClientOptions;
+
+typedef struct NetClientOptionsList
+{
+ union {
+ NetClientOptions *value;
+ uint64_t padding;
+ };
+ struct NetClientOptionsList *next;
+} NetClientOptionsList;
+
+extern const char *NetClientOptionsKind_lookup[];
+typedef enum NetClientOptionsKind
+{
+ NET_CLIENT_OPTIONS_KIND_NONE = 0,
+ NET_CLIENT_OPTIONS_KIND_NIC = 1,
+ NET_CLIENT_OPTIONS_KIND_USER = 2,
+ NET_CLIENT_OPTIONS_KIND_TAP = 3,
+ NET_CLIENT_OPTIONS_KIND_SOCKET = 4,
+ NET_CLIENT_OPTIONS_KIND_VDE = 5,
+ NET_CLIENT_OPTIONS_KIND_DUMP = 6,
+ NET_CLIENT_OPTIONS_KIND_BRIDGE = 7,
+ NET_CLIENT_OPTIONS_KIND_HUBPORT = 8,
+ NET_CLIENT_OPTIONS_KIND_MAX = 9,
+} NetClientOptionsKind;
+
+
+typedef struct NetLegacy NetLegacy;
+
+typedef struct NetLegacyList
+{
+ union {
+ NetLegacy *value;
+ uint64_t padding;
+ };
+ struct NetLegacyList *next;
+} NetLegacyList;
+
+
+typedef struct Netdev Netdev;
+
+typedef struct NetdevList
+{
+ union {
+ Netdev *value;
+ uint64_t padding;
+ };
+ struct NetdevList *next;
+} NetdevList;
+
+
+typedef struct InetSocketAddress InetSocketAddress;
+
+typedef struct InetSocketAddressList
+{
+ union {
+ InetSocketAddress *value;
+ uint64_t padding;
+ };
+ struct InetSocketAddressList *next;
+} InetSocketAddressList;
+
+
+typedef struct UnixSocketAddress UnixSocketAddress;
+
+typedef struct UnixSocketAddressList
+{
+ union {
+ UnixSocketAddress *value;
+ uint64_t padding;
+ };
+ struct UnixSocketAddressList *next;
+} UnixSocketAddressList;
+
+
+typedef struct SocketAddress SocketAddress;
+
+typedef struct SocketAddressList
+{
+ union {
+ SocketAddress *value;
+ uint64_t padding;
+ };
+ struct SocketAddressList *next;
+} SocketAddressList;
+
+extern const char *SocketAddressKind_lookup[];
+typedef enum SocketAddressKind
+{
+ SOCKET_ADDRESS_KIND_INET = 0,
+ SOCKET_ADDRESS_KIND_UNIX = 1,
+ SOCKET_ADDRESS_KIND_FD = 2,
+ SOCKET_ADDRESS_KIND_MAX = 3,
+} SocketAddressKind;
+
+
+typedef struct MachineInfo MachineInfo;
+
+typedef struct MachineInfoList
+{
+ union {
+ MachineInfo *value;
+ uint64_t padding;
+ };
+ struct MachineInfoList *next;
+} MachineInfoList;
+
+
+typedef struct CpuDefinitionInfo CpuDefinitionInfo;
+
+typedef struct CpuDefinitionInfoList
+{
+ union {
+ CpuDefinitionInfo *value;
+ uint64_t padding;
+ };
+ struct CpuDefinitionInfoList *next;
+} CpuDefinitionInfoList;
+
+
+typedef struct AddfdInfo AddfdInfo;
+
+typedef struct AddfdInfoList
+{
+ union {
+ AddfdInfo *value;
+ uint64_t padding;
+ };
+ struct AddfdInfoList *next;
+} AddfdInfoList;
+
+
+typedef struct FdsetFdInfo FdsetFdInfo;
+
+typedef struct FdsetFdInfoList
+{
+ union {
+ FdsetFdInfo *value;
+ uint64_t padding;
+ };
+ struct FdsetFdInfoList *next;
+} FdsetFdInfoList;
+
+
+typedef struct FdsetInfo FdsetInfo;
+
+typedef struct FdsetInfoList
+{
+ union {
+ FdsetInfo *value;
+ uint64_t padding;
+ };
+ struct FdsetInfoList *next;
+} FdsetInfoList;
+
+
+typedef struct TargetInfo TargetInfo;
+
+typedef struct TargetInfoList
+{
+ union {
+ TargetInfo *value;
+ uint64_t padding;
+ };
+ struct TargetInfoList *next;
+} TargetInfoList;
+
+extern const char *QKeyCode_lookup[];
+typedef enum QKeyCode
+{
+ Q_KEY_CODE_SHIFT = 0,
+ Q_KEY_CODE_SHIFT_R = 1,
+ Q_KEY_CODE_ALT = 2,
+ Q_KEY_CODE_ALT_R = 3,
+ Q_KEY_CODE_ALTGR = 4,
+ Q_KEY_CODE_ALTGR_R = 5,
+ Q_KEY_CODE_CTRL = 6,
+ Q_KEY_CODE_CTRL_R = 7,
+ Q_KEY_CODE_MENU = 8,
+ Q_KEY_CODE_ESC = 9,
+ Q_KEY_CODE_1 = 10,
+ Q_KEY_CODE_2 = 11,
+ Q_KEY_CODE_3 = 12,
+ Q_KEY_CODE_4 = 13,
+ Q_KEY_CODE_5 = 14,
+ Q_KEY_CODE_6 = 15,
+ Q_KEY_CODE_7 = 16,
+ Q_KEY_CODE_8 = 17,
+ Q_KEY_CODE_9 = 18,
+ Q_KEY_CODE_0 = 19,
+ Q_KEY_CODE_MINUS = 20,
+ Q_KEY_CODE_EQUAL = 21,
+ Q_KEY_CODE_BACKSPACE = 22,
+ Q_KEY_CODE_TAB = 23,
+ Q_KEY_CODE_Q = 24,
+ Q_KEY_CODE_W = 25,
+ Q_KEY_CODE_E = 26,
+ Q_KEY_CODE_R = 27,
+ Q_KEY_CODE_T = 28,
+ Q_KEY_CODE_Y = 29,
+ Q_KEY_CODE_U = 30,
+ Q_KEY_CODE_I = 31,
+ Q_KEY_CODE_O = 32,
+ Q_KEY_CODE_P = 33,
+ Q_KEY_CODE_BRACKET_LEFT = 34,
+ Q_KEY_CODE_BRACKET_RIGHT = 35,
+ Q_KEY_CODE_RET = 36,
+ Q_KEY_CODE_A = 37,
+ Q_KEY_CODE_S = 38,
+ Q_KEY_CODE_D = 39,
+ Q_KEY_CODE_F = 40,
+ Q_KEY_CODE_G = 41,
+ Q_KEY_CODE_H = 42,
+ Q_KEY_CODE_J = 43,
+ Q_KEY_CODE_K = 44,
+ Q_KEY_CODE_L = 45,
+ Q_KEY_CODE_SEMICOLON = 46,
+ Q_KEY_CODE_APOSTROPHE = 47,
+ Q_KEY_CODE_GRAVE_ACCENT = 48,
+ Q_KEY_CODE_BACKSLASH = 49,
+ Q_KEY_CODE_Z = 50,
+ Q_KEY_CODE_X = 51,
+ Q_KEY_CODE_C = 52,
+ Q_KEY_CODE_V = 53,
+ Q_KEY_CODE_B = 54,
+ Q_KEY_CODE_N = 55,
+ Q_KEY_CODE_M = 56,
+ Q_KEY_CODE_COMMA = 57,
+ Q_KEY_CODE_DOT = 58,
+ Q_KEY_CODE_SLASH = 59,
+ Q_KEY_CODE_ASTERISK = 60,
+ Q_KEY_CODE_SPC = 61,
+ Q_KEY_CODE_CAPS_LOCK = 62,
+ Q_KEY_CODE_F1 = 63,
+ Q_KEY_CODE_F2 = 64,
+ Q_KEY_CODE_F3 = 65,
+ Q_KEY_CODE_F4 = 66,
+ Q_KEY_CODE_F5 = 67,
+ Q_KEY_CODE_F6 = 68,
+ Q_KEY_CODE_F7 = 69,
+ Q_KEY_CODE_F8 = 70,
+ Q_KEY_CODE_F9 = 71,
+ Q_KEY_CODE_F10 = 72,
+ Q_KEY_CODE_NUM_LOCK = 73,
+ Q_KEY_CODE_SCROLL_LOCK = 74,
+ Q_KEY_CODE_KP_DIVIDE = 75,
+ Q_KEY_CODE_KP_MULTIPLY = 76,
+ Q_KEY_CODE_KP_SUBTRACT = 77,
+ Q_KEY_CODE_KP_ADD = 78,
+ Q_KEY_CODE_KP_ENTER = 79,
+ Q_KEY_CODE_KP_DECIMAL = 80,
+ Q_KEY_CODE_SYSRQ = 81,
+ Q_KEY_CODE_KP_0 = 82,
+ Q_KEY_CODE_KP_1 = 83,
+ Q_KEY_CODE_KP_2 = 84,
+ Q_KEY_CODE_KP_3 = 85,
+ Q_KEY_CODE_KP_4 = 86,
+ Q_KEY_CODE_KP_5 = 87,
+ Q_KEY_CODE_KP_6 = 88,
+ Q_KEY_CODE_KP_7 = 89,
+ Q_KEY_CODE_KP_8 = 90,
+ Q_KEY_CODE_KP_9 = 91,
+ Q_KEY_CODE_LESS = 92,
+ Q_KEY_CODE_F11 = 93,
+ Q_KEY_CODE_F12 = 94,
+ Q_KEY_CODE_PRINT = 95,
+ Q_KEY_CODE_HOME = 96,
+ Q_KEY_CODE_PGUP = 97,
+ Q_KEY_CODE_PGDN = 98,
+ Q_KEY_CODE_END = 99,
+ Q_KEY_CODE_LEFT = 100,
+ Q_KEY_CODE_UP = 101,
+ Q_KEY_CODE_DOWN = 102,
+ Q_KEY_CODE_RIGHT = 103,
+ Q_KEY_CODE_INSERT = 104,
+ Q_KEY_CODE_DELETE = 105,
+ Q_KEY_CODE_STOP = 106,
+ Q_KEY_CODE_AGAIN = 107,
+ Q_KEY_CODE_PROPS = 108,
+ Q_KEY_CODE_UNDO = 109,
+ Q_KEY_CODE_FRONT = 110,
+ Q_KEY_CODE_COPY = 111,
+ Q_KEY_CODE_OPEN = 112,
+ Q_KEY_CODE_PASTE = 113,
+ Q_KEY_CODE_FIND = 114,
+ Q_KEY_CODE_CUT = 115,
+ Q_KEY_CODE_LF = 116,
+ Q_KEY_CODE_HELP = 117,
+ Q_KEY_CODE_META_L = 118,
+ Q_KEY_CODE_META_R = 119,
+ Q_KEY_CODE_COMPOSE = 120,
+ Q_KEY_CODE_MAX = 121,
+} QKeyCode;
+
+typedef struct QKeyCodeList
+{
+ QKeyCode value;
+ struct QKeyCodeList *next;
+} QKeyCodeList;
+
+
+typedef struct KeyValue KeyValue;
+
+typedef struct KeyValueList
+{
+ union {
+ KeyValue *value;
+ uint64_t padding;
+ };
+ struct KeyValueList *next;
+} KeyValueList;
+
+extern const char *KeyValueKind_lookup[];
+typedef enum KeyValueKind
+{
+ KEY_VALUE_KIND_NUMBER = 0,
+ KEY_VALUE_KIND_QCODE = 1,
+ KEY_VALUE_KIND_MAX = 2,
+} KeyValueKind;
+
+
+typedef struct ChardevFile ChardevFile;
+
+typedef struct ChardevFileList
+{
+ union {
+ ChardevFile *value;
+ uint64_t padding;
+ };
+ struct ChardevFileList *next;
+} ChardevFileList;
+
+
+typedef struct ChardevHostdev ChardevHostdev;
+
+typedef struct ChardevHostdevList
+{
+ union {
+ ChardevHostdev *value;
+ uint64_t padding;
+ };
+ struct ChardevHostdevList *next;
+} ChardevHostdevList;
+
+
+typedef struct ChardevSocket ChardevSocket;
+
+typedef struct ChardevSocketList
+{
+ union {
+ ChardevSocket *value;
+ uint64_t padding;
+ };
+ struct ChardevSocketList *next;
+} ChardevSocketList;
+
+
+typedef struct ChardevUdp ChardevUdp;
+
+typedef struct ChardevUdpList
+{
+ union {
+ ChardevUdp *value;
+ uint64_t padding;
+ };
+ struct ChardevUdpList *next;
+} ChardevUdpList;
+
+
+typedef struct ChardevMux ChardevMux;
+
+typedef struct ChardevMuxList
+{
+ union {
+ ChardevMux *value;
+ uint64_t padding;
+ };
+ struct ChardevMuxList *next;
+} ChardevMuxList;
+
+
+typedef struct ChardevStdio ChardevStdio;
+
+typedef struct ChardevStdioList
+{
+ union {
+ ChardevStdio *value;
+ uint64_t padding;
+ };
+ struct ChardevStdioList *next;
+} ChardevStdioList;
+
+
+typedef struct ChardevSpiceChannel ChardevSpiceChannel;
+
+typedef struct ChardevSpiceChannelList
+{
+ union {
+ ChardevSpiceChannel *value;
+ uint64_t padding;
+ };
+ struct ChardevSpiceChannelList *next;
+} ChardevSpiceChannelList;
+
+
+typedef struct ChardevSpicePort ChardevSpicePort;
+
+typedef struct ChardevSpicePortList
+{
+ union {
+ ChardevSpicePort *value;
+ uint64_t padding;
+ };
+ struct ChardevSpicePortList *next;
+} ChardevSpicePortList;
+
+
+typedef struct ChardevVC ChardevVC;
+
+typedef struct ChardevVCList
+{
+ union {
+ ChardevVC *value;
+ uint64_t padding;
+ };
+ struct ChardevVCList *next;
+} ChardevVCList;
+
+
+typedef struct ChardevMemory ChardevMemory;
+
+typedef struct ChardevMemoryList
+{
+ union {
+ ChardevMemory *value;
+ uint64_t padding;
+ };
+ struct ChardevMemoryList *next;
+} ChardevMemoryList;
+
+
+typedef struct ChardevDummy ChardevDummy;
+
+typedef struct ChardevDummyList
+{
+ union {
+ ChardevDummy *value;
+ uint64_t padding;
+ };
+ struct ChardevDummyList *next;
+} ChardevDummyList;
+
+
+typedef struct ChardevBackend ChardevBackend;
+
+typedef struct ChardevBackendList
+{
+ union {
+ ChardevBackend *value;
+ uint64_t padding;
+ };
+ struct ChardevBackendList *next;
+} ChardevBackendList;
+
+extern const char *ChardevBackendKind_lookup[];
+typedef enum ChardevBackendKind
+{
+ CHARDEV_BACKEND_KIND_FILE = 0,
+ CHARDEV_BACKEND_KIND_SERIAL = 1,
+ CHARDEV_BACKEND_KIND_PARALLEL = 2,
+ CHARDEV_BACKEND_KIND_PIPE = 3,
+ CHARDEV_BACKEND_KIND_SOCKET = 4,
+ CHARDEV_BACKEND_KIND_UDP = 5,
+ CHARDEV_BACKEND_KIND_PTY = 6,
+ CHARDEV_BACKEND_KIND_NULL = 7,
+ CHARDEV_BACKEND_KIND_MUX = 8,
+ CHARDEV_BACKEND_KIND_MSMOUSE = 9,
+ CHARDEV_BACKEND_KIND_BRAILLE = 10,
+ CHARDEV_BACKEND_KIND_STDIO = 11,
+ CHARDEV_BACKEND_KIND_CONSOLE = 12,
+ CHARDEV_BACKEND_KIND_SPICEVMC = 13,
+ CHARDEV_BACKEND_KIND_SPICEPORT = 14,
+ CHARDEV_BACKEND_KIND_VC = 15,
+ CHARDEV_BACKEND_KIND_MEMORY = 16,
+ CHARDEV_BACKEND_KIND_MAX = 17,
+} ChardevBackendKind;
+
+
+typedef struct ChardevReturn ChardevReturn;
+
+typedef struct ChardevReturnList
+{
+ union {
+ ChardevReturn *value;
+ uint64_t padding;
+ };
+ struct ChardevReturnList *next;
+} ChardevReturnList;
+
+extern const char *TpmModel_lookup[];
+typedef enum TpmModel
+{
+ TPM_MODEL_TPM_TIS = 0,
+ TPM_MODEL_MAX = 1,
+} TpmModel;
+
+typedef struct TpmModelList
+{
+ TpmModel value;
+ struct TpmModelList *next;
+} TpmModelList;
+
+extern const char *TpmType_lookup[];
+typedef enum TpmType
+{
+ TPM_TYPE_PASSTHROUGH = 0,
+ TPM_TYPE_MAX = 1,
+} TpmType;
+
+typedef struct TpmTypeList
+{
+ TpmType value;
+ struct TpmTypeList *next;
+} TpmTypeList;
+
+
+typedef struct TPMPassthroughOptions TPMPassthroughOptions;
+
+typedef struct TPMPassthroughOptionsList
+{
+ union {
+ TPMPassthroughOptions *value;
+ uint64_t padding;
+ };
+ struct TPMPassthroughOptionsList *next;
+} TPMPassthroughOptionsList;
+
+
+typedef struct TpmTypeOptions TpmTypeOptions;
+
+typedef struct TpmTypeOptionsList
+{
+ union {
+ TpmTypeOptions *value;
+ uint64_t padding;
+ };
+ struct TpmTypeOptionsList *next;
+} TpmTypeOptionsList;
+
+extern const char *TpmTypeOptionsKind_lookup[];
+typedef enum TpmTypeOptionsKind
+{
+ TPM_TYPE_OPTIONS_KIND_PASSTHROUGH = 0,
+ TPM_TYPE_OPTIONS_KIND_MAX = 1,
+} TpmTypeOptionsKind;
+
+
+typedef struct TPMInfo TPMInfo;
+
+typedef struct TPMInfoList
+{
+ union {
+ TPMInfo *value;
+ uint64_t padding;
+ };
+ struct TPMInfoList *next;
+} TPMInfoList;
+
+
+typedef struct AcpiTableOptions AcpiTableOptions;
+
+typedef struct AcpiTableOptionsList
+{
+ union {
+ AcpiTableOptions *value;
+ uint64_t padding;
+ };
+ struct AcpiTableOptionsList *next;
+} AcpiTableOptionsList;
+
+extern const char *CommandLineParameterType_lookup[];
+typedef enum CommandLineParameterType
+{
+ COMMAND_LINE_PARAMETER_TYPE_STRING = 0,
+ COMMAND_LINE_PARAMETER_TYPE_BOOLEAN = 1,
+ COMMAND_LINE_PARAMETER_TYPE_NUMBER = 2,
+ COMMAND_LINE_PARAMETER_TYPE_SIZE = 3,
+ COMMAND_LINE_PARAMETER_TYPE_MAX = 4,
+} CommandLineParameterType;
+
+typedef struct CommandLineParameterTypeList
+{
+ CommandLineParameterType value;
+ struct CommandLineParameterTypeList *next;
+} CommandLineParameterTypeList;
+
+
+typedef struct CommandLineParameterInfo CommandLineParameterInfo;
+
+typedef struct CommandLineParameterInfoList
+{
+ union {
+ CommandLineParameterInfo *value;
+ uint64_t padding;
+ };
+ struct CommandLineParameterInfoList *next;
+} CommandLineParameterInfoList;
+
+
+typedef struct CommandLineOptionInfo CommandLineOptionInfo;
+
+typedef struct CommandLineOptionInfoList
+{
+ union {
+ CommandLineOptionInfo *value;
+ uint64_t padding;
+ };
+ struct CommandLineOptionInfoList *next;
+} CommandLineOptionInfoList;
+
+extern const char *X86CPURegister32_lookup[];
+typedef enum X86CPURegister32
+{
+ X86_C_P_U_REGISTER32_EAX = 0,
+ X86_C_P_U_REGISTER32_EBX = 1,
+ X86_C_P_U_REGISTER32_ECX = 2,
+ X86_C_P_U_REGISTER32_EDX = 3,
+ X86_C_P_U_REGISTER32_ESP = 4,
+ X86_C_P_U_REGISTER32_EBP = 5,
+ X86_C_P_U_REGISTER32_ESI = 6,
+ X86_C_P_U_REGISTER32_EDI = 7,
+ X86_C_P_U_REGISTER32_MAX = 8,
+} X86CPURegister32;
+
+typedef struct X86CPURegister32List
+{
+ X86CPURegister32 value;
+ struct X86CPURegister32List *next;
+} X86CPURegister32List;
+
+
+typedef struct X86CPUFeatureWordInfo X86CPUFeatureWordInfo;
+
+typedef struct X86CPUFeatureWordInfoList
+{
+ union {
+ X86CPUFeatureWordInfo *value;
+ uint64_t padding;
+ };
+ struct X86CPUFeatureWordInfoList *next;
+} X86CPUFeatureWordInfoList;
+
+extern const char *RxState_lookup[];
+typedef enum RxState
+{
+ RX_STATE_NORMAL = 0,
+ RX_STATE_NONE = 1,
+ RX_STATE_ALL = 2,
+ RX_STATE_MAX = 3,
+} RxState;
+
+typedef struct RxStateList
+{
+ RxState value;
+ struct RxStateList *next;
+} RxStateList;
+
+
+typedef struct RxFilterInfo RxFilterInfo;
+
+typedef struct RxFilterInfoList
+{
+ union {
+ RxFilterInfo *value;
+ uint64_t padding;
+ };
+ struct RxFilterInfoList *next;
+} RxFilterInfoList;
+
+#ifndef QAPI_TYPES_BUILTIN_CLEANUP_DECL_H
+#define QAPI_TYPES_BUILTIN_CLEANUP_DECL_H
+
+void qapi_free_strList(strList * obj);
+void qapi_free_intList(intList * obj);
+void qapi_free_numberList(numberList * obj);
+void qapi_free_boolList(boolList * obj);
+void qapi_free_int8List(int8List * obj);
+void qapi_free_int16List(int16List * obj);
+void qapi_free_int32List(int32List * obj);
+void qapi_free_int64List(int64List * obj);
+void qapi_free_uint8List(uint8List * obj);
+void qapi_free_uint16List(uint16List * obj);
+void qapi_free_uint32List(uint32List * obj);
+void qapi_free_uint64List(uint64List * obj);
+
+#endif /* QAPI_TYPES_BUILTIN_CLEANUP_DECL_H */
+
+
+void qapi_free_ErrorClassList(ErrorClassList * obj);
+
+struct NameInfo
+{
+ bool has_name;
+ char * name;
+};
+
+void qapi_free_NameInfoList(NameInfoList * obj);
+void qapi_free_NameInfo(NameInfo * obj);
+
+struct VersionInfo
+{
+ struct
+ {
+ int64_t major;
+ int64_t minor;
+ int64_t micro;
+ } qemu;
+ char * package;
+};
+
+void qapi_free_VersionInfoList(VersionInfoList * obj);
+void qapi_free_VersionInfo(VersionInfo * obj);
+
+struct KvmInfo
+{
+ bool enabled;
+ bool present;
+};
+
+void qapi_free_KvmInfoList(KvmInfoList * obj);
+void qapi_free_KvmInfo(KvmInfo * obj);
+
+void qapi_free_RunStateList(RunStateList * obj);
+
+struct SnapshotInfo
+{
+ char * id;
+ char * name;
+ int64_t vm_state_size;
+ int64_t date_sec;
+ int64_t date_nsec;
+ int64_t vm_clock_sec;
+ int64_t vm_clock_nsec;
+};
+
+void qapi_free_SnapshotInfoList(SnapshotInfoList * obj);
+void qapi_free_SnapshotInfo(SnapshotInfo * obj);
+
+struct ImageInfo
+{
+ char * filename;
+ char * format;
+ bool has_dirty_flag;
+ bool dirty_flag;
+ bool has_actual_size;
+ int64_t actual_size;
+ int64_t virtual_size;
+ bool has_cluster_size;
+ int64_t cluster_size;
+ bool has_encrypted;
+ bool encrypted;
+ bool has_backing_filename;
+ char * backing_filename;
+ bool has_full_backing_filename;
+ char * full_backing_filename;
+ bool has_backing_filename_format;
+ char * backing_filename_format;
+ bool has_snapshots;
+ SnapshotInfoList * snapshots;
+ bool has_backing_image;
+ ImageInfo * backing_image;
+};
+
+void qapi_free_ImageInfoList(ImageInfoList * obj);
+void qapi_free_ImageInfo(ImageInfo * obj);
+
+struct ImageCheck
+{
+ char * filename;
+ char * format;
+ int64_t check_errors;
+ bool has_image_end_offset;
+ int64_t image_end_offset;
+ bool has_corruptions;
+ int64_t corruptions;
+ bool has_leaks;
+ int64_t leaks;
+ bool has_corruptions_fixed;
+ int64_t corruptions_fixed;
+ bool has_leaks_fixed;
+ int64_t leaks_fixed;
+ bool has_total_clusters;
+ int64_t total_clusters;
+ bool has_allocated_clusters;
+ int64_t allocated_clusters;
+ bool has_fragmented_clusters;
+ int64_t fragmented_clusters;
+ bool has_compressed_clusters;
+ int64_t compressed_clusters;
+};
+
+void qapi_free_ImageCheckList(ImageCheckList * obj);
+void qapi_free_ImageCheck(ImageCheck * obj);
+
+struct StatusInfo
+{
+ bool running;
+ bool singlestep;
+ RunState status;
+};
+
+void qapi_free_StatusInfoList(StatusInfoList * obj);
+void qapi_free_StatusInfo(StatusInfo * obj);
+
+struct UuidInfo
+{
+ char * UUID;
+};
+
+void qapi_free_UuidInfoList(UuidInfoList * obj);
+void qapi_free_UuidInfo(UuidInfo * obj);
+
+struct ChardevInfo
+{
+ char * label;
+ char * filename;
+};
+
+void qapi_free_ChardevInfoList(ChardevInfoList * obj);
+void qapi_free_ChardevInfo(ChardevInfo * obj);
+
+void qapi_free_DataFormatList(DataFormatList * obj);
+
+struct CommandInfo
+{
+ char * name;
+};
+
+void qapi_free_CommandInfoList(CommandInfoList * obj);
+void qapi_free_CommandInfo(CommandInfo * obj);
+
+struct EventInfo
+{
+ char * name;
+};
+
+void qapi_free_EventInfoList(EventInfoList * obj);
+void qapi_free_EventInfo(EventInfo * obj);
+
+struct MigrationStats
+{
+ int64_t transferred;
+ int64_t remaining;
+ int64_t total;
+ int64_t duplicate;
+ int64_t skipped;
+ int64_t normal;
+ int64_t normal_bytes;
+ int64_t dirty_pages_rate;
+ double mbps;
+};
+
+void qapi_free_MigrationStatsList(MigrationStatsList * obj);
+void qapi_free_MigrationStats(MigrationStats * obj);
+
+struct XBZRLECacheStats
+{
+ int64_t cache_size;
+ int64_t bytes;
+ int64_t pages;
+ int64_t cache_miss;
+ int64_t overflow;
+};
+
+void qapi_free_XBZRLECacheStatsList(XBZRLECacheStatsList * obj);
+void qapi_free_XBZRLECacheStats(XBZRLECacheStats * obj);
+
+struct MigrationInfo
+{
+ bool has_status;
+ char * status;
+ bool has_ram;
+ MigrationStats * ram;
+ bool has_disk;
+ MigrationStats * disk;
+ bool has_xbzrle_cache;
+ XBZRLECacheStats * xbzrle_cache;
+ bool has_total_time;
+ int64_t total_time;
+ bool has_expected_downtime;
+ int64_t expected_downtime;
+ bool has_downtime;
+ int64_t downtime;
+};
+
+void qapi_free_MigrationInfoList(MigrationInfoList * obj);
+void qapi_free_MigrationInfo(MigrationInfo * obj);
+
+void qapi_free_MigrationCapabilityList(MigrationCapabilityList * obj);
+
+struct MigrationCapabilityStatus
+{
+ MigrationCapability capability;
+ bool state;
+};
+
+void qapi_free_MigrationCapabilityStatusList(MigrationCapabilityStatusList * obj);
+void qapi_free_MigrationCapabilityStatus(MigrationCapabilityStatus * obj);
+
+struct MouseInfo
+{
+ char * name;
+ int64_t index;
+ bool current;
+ bool absolute;
+};
+
+void qapi_free_MouseInfoList(MouseInfoList * obj);
+void qapi_free_MouseInfo(MouseInfo * obj);
+
+struct CpuInfo
+{
+ int64_t CPU;
+ bool current;
+ bool halted;
+ bool has_pc;
+ int64_t pc;
+ bool has_nip;
+ int64_t nip;
+ bool has_npc;
+ int64_t npc;
+ bool has_PC;
+ int64_t PC;
+ int64_t thread_id;
+};
+
+void qapi_free_CpuInfoList(CpuInfoList * obj);
+void qapi_free_CpuInfo(CpuInfo * obj);
+
+struct BlockDeviceInfo
+{
+ char * file;
+ bool ro;
+ char * drv;
+ bool has_backing_file;
+ char * backing_file;
+ int64_t backing_file_depth;
+ bool encrypted;
+ bool encryption_key_missing;
+ int64_t bps;
+ int64_t bps_rd;
+ int64_t bps_wr;
+ int64_t iops;
+ int64_t iops_rd;
+ int64_t iops_wr;
+ ImageInfo * image;
+};
+
+void qapi_free_BlockDeviceInfoList(BlockDeviceInfoList * obj);
+void qapi_free_BlockDeviceInfo(BlockDeviceInfo * obj);
+
+void qapi_free_BlockDeviceIoStatusList(BlockDeviceIoStatusList * obj);
+
+struct BlockDirtyInfo
+{
+ int64_t count;
+ int64_t granularity;
+};
+
+void qapi_free_BlockDirtyInfoList(BlockDirtyInfoList * obj);
+void qapi_free_BlockDirtyInfo(BlockDirtyInfo * obj);
+
+struct BlockInfo
+{
+ char * device;
+ char * type;
+ bool removable;
+ bool locked;
+ bool has_inserted;
+ BlockDeviceInfo * inserted;
+ bool has_tray_open;
+ bool tray_open;
+ bool has_io_status;
+ BlockDeviceIoStatus io_status;
+ bool has_dirty;
+ BlockDirtyInfo * dirty;
+};
+
+void qapi_free_BlockInfoList(BlockInfoList * obj);
+void qapi_free_BlockInfo(BlockInfo * obj);
+
+struct BlockDeviceStats
+{
+ int64_t rd_bytes;
+ int64_t wr_bytes;
+ int64_t rd_operations;
+ int64_t wr_operations;
+ int64_t flush_operations;
+ int64_t flush_total_time_ns;
+ int64_t wr_total_time_ns;
+ int64_t rd_total_time_ns;
+ int64_t wr_highest_offset;
+};
+
+void qapi_free_BlockDeviceStatsList(BlockDeviceStatsList * obj);
+void qapi_free_BlockDeviceStats(BlockDeviceStats * obj);
+
+struct BlockStats
+{
+ bool has_device;
+ char * device;
+ BlockDeviceStats * stats;
+ bool has_parent;
+ BlockStats * parent;
+};
+
+void qapi_free_BlockStatsList(BlockStatsList * obj);
+void qapi_free_BlockStats(BlockStats * obj);
+
+struct VncClientInfo
+{
+ char * host;
+ char * family;
+ char * service;
+ bool has_x509_dname;
+ char * x509_dname;
+ bool has_sasl_username;
+ char * sasl_username;
+};
+
+void qapi_free_VncClientInfoList(VncClientInfoList * obj);
+void qapi_free_VncClientInfo(VncClientInfo * obj);
+
+struct VncInfo
+{
+ bool enabled;
+ bool has_host;
+ char * host;
+ bool has_family;
+ char * family;
+ bool has_service;
+ char * service;
+ bool has_auth;
+ char * auth;
+ bool has_clients;
+ VncClientInfoList * clients;
+};
+
+void qapi_free_VncInfoList(VncInfoList * obj);
+void qapi_free_VncInfo(VncInfo * obj);
+
+struct SpiceChannel
+{
+ char * host;
+ char * family;
+ char * port;
+ int64_t connection_id;
+ int64_t channel_type;
+ int64_t channel_id;
+ bool tls;
+};
+
+void qapi_free_SpiceChannelList(SpiceChannelList * obj);
+void qapi_free_SpiceChannel(SpiceChannel * obj);
+
+void qapi_free_SpiceQueryMouseModeList(SpiceQueryMouseModeList * obj);
+
+struct SpiceInfo
+{
+ bool enabled;
+ bool migrated;
+ bool has_host;
+ char * host;
+ bool has_port;
+ int64_t port;
+ bool has_tls_port;
+ int64_t tls_port;
+ bool has_auth;
+ char * auth;
+ bool has_compiled_version;
+ char * compiled_version;
+ SpiceQueryMouseMode mouse_mode;
+ bool has_channels;
+ SpiceChannelList * channels;
+};
+
+void qapi_free_SpiceInfoList(SpiceInfoList * obj);
+void qapi_free_SpiceInfo(SpiceInfo * obj);
+
+struct BalloonInfo
+{
+ int64_t actual;
+};
+
+void qapi_free_BalloonInfoList(BalloonInfoList * obj);
+void qapi_free_BalloonInfo(BalloonInfo * obj);
+
+struct PciMemoryRange
+{
+ int64_t base;
+ int64_t limit;
+};
+
+void qapi_free_PciMemoryRangeList(PciMemoryRangeList * obj);
+void qapi_free_PciMemoryRange(PciMemoryRange * obj);
+
+struct PciMemoryRegion
+{
+ int64_t bar;
+ char * type;
+ int64_t address;
+ int64_t size;
+ bool has_prefetch;
+ bool prefetch;
+ bool has_mem_type_64;
+ bool mem_type_64;
+};
+
+void qapi_free_PciMemoryRegionList(PciMemoryRegionList * obj);
+void qapi_free_PciMemoryRegion(PciMemoryRegion * obj);
+
+struct PciBridgeInfo
+{
+ struct
+ {
+ int64_t number;
+ int64_t secondary;
+ int64_t subordinate;
+ PciMemoryRange * io_range;
+ PciMemoryRange * memory_range;
+ PciMemoryRange * prefetchable_range;
+ } bus;
+ bool has_devices;
+ PciDeviceInfoList * devices;
+};
+
+void qapi_free_PciBridgeInfoList(PciBridgeInfoList * obj);
+void qapi_free_PciBridgeInfo(PciBridgeInfo * obj);
+
+struct PciDeviceInfo
+{
+ int64_t bus;
+ int64_t slot;
+ int64_t function;
+ struct
+ {
+ bool has_desc;
+ char * desc;
+ int64_t class;
+ } class_info;
+ struct
+ {
+ int64_t device;
+ int64_t vendor;
+ } id;
+ bool has_irq;
+ int64_t irq;
+ char * qdev_id;
+ bool has_pci_bridge;
+ PciBridgeInfo * pci_bridge;
+ PciMemoryRegionList * regions;
+};
+
+void qapi_free_PciDeviceInfoList(PciDeviceInfoList * obj);
+void qapi_free_PciDeviceInfo(PciDeviceInfo * obj);
+
+struct PciInfo
+{
+ int64_t bus;
+ PciDeviceInfoList * devices;
+};
+
+void qapi_free_PciInfoList(PciInfoList * obj);
+void qapi_free_PciInfo(PciInfo * obj);
+
+void qapi_free_BlockdevOnErrorList(BlockdevOnErrorList * obj);
+
+void qapi_free_MirrorSyncModeList(MirrorSyncModeList * obj);
+
+struct BlockJobInfo
+{
+ char * type;
+ char * device;
+ int64_t len;
+ int64_t offset;
+ bool busy;
+ bool paused;
+ int64_t speed;
+ BlockDeviceIoStatus io_status;
+};
+
+void qapi_free_BlockJobInfoList(BlockJobInfoList * obj);
+void qapi_free_BlockJobInfo(BlockJobInfo * obj);
+
+void qapi_free_NewImageModeList(NewImageModeList * obj);
+
+struct BlockdevSnapshot
+{
+ char * device;
+ char * snapshot_file;
+ bool has_format;
+ char * format;
+ bool has_mode;
+ NewImageMode mode;
+};
+
+void qapi_free_BlockdevSnapshotList(BlockdevSnapshotList * obj);
+void qapi_free_BlockdevSnapshot(BlockdevSnapshot * obj);
+
+struct DriveBackup
+{
+ char * device;
+ char * target;
+ bool has_format;
+ char * format;
+ MirrorSyncMode sync;
+ bool has_mode;
+ NewImageMode mode;
+ bool has_speed;
+ int64_t speed;
+ bool has_on_source_error;
+ BlockdevOnError on_source_error;
+ bool has_on_target_error;
+ BlockdevOnError on_target_error;
+};
+
+void qapi_free_DriveBackupList(DriveBackupList * obj);
+void qapi_free_DriveBackup(DriveBackup * obj);
+
+struct Abort
+{
+};
+
+void qapi_free_AbortList(AbortList * obj);
+void qapi_free_Abort(Abort * obj);
+
+struct TransactionAction
+{
+ TransactionActionKind kind;
+ union {
+ void *data;
+ BlockdevSnapshot * blockdev_snapshot_sync;
+ DriveBackup * drive_backup;
+ Abort * abort;
+ };
+};
+void qapi_free_TransactionActionList(TransactionActionList * obj);
+void qapi_free_TransactionAction(TransactionAction * obj);
+
+struct ObjectPropertyInfo
+{
+ char * name;
+ char * type;
+};
+
+void qapi_free_ObjectPropertyInfoList(ObjectPropertyInfoList * obj);
+void qapi_free_ObjectPropertyInfo(ObjectPropertyInfo * obj);
+
+struct ObjectTypeInfo
+{
+ char * name;
+};
+
+void qapi_free_ObjectTypeInfoList(ObjectTypeInfoList * obj);
+void qapi_free_ObjectTypeInfo(ObjectTypeInfo * obj);
+
+struct DevicePropertyInfo
+{
+ char * name;
+ char * type;
+};
+
+void qapi_free_DevicePropertyInfoList(DevicePropertyInfoList * obj);
+void qapi_free_DevicePropertyInfo(DevicePropertyInfo * obj);
+
+struct NetdevNoneOptions
+{
+};
+
+void qapi_free_NetdevNoneOptionsList(NetdevNoneOptionsList * obj);
+void qapi_free_NetdevNoneOptions(NetdevNoneOptions * obj);
+
+struct NetLegacyNicOptions
+{
+ bool has_netdev;
+ char * netdev;
+ bool has_macaddr;
+ char * macaddr;
+ bool has_model;
+ char * model;
+ bool has_addr;
+ char * addr;
+ bool has_vectors;
+ uint32_t vectors;
+};
+
+void qapi_free_NetLegacyNicOptionsList(NetLegacyNicOptionsList * obj);
+void qapi_free_NetLegacyNicOptions(NetLegacyNicOptions * obj);
+
+struct String
+{
+ char * str;
+};
+
+void qapi_free_StringList(StringList * obj);
+void qapi_free_String(String * obj);
+
+struct NetdevUserOptions
+{
+ bool has_hostname;
+ char * hostname;
+ bool has_q_restrict;
+ bool q_restrict;
+ bool has_ip;
+ char * ip;
+ bool has_net;
+ char * net;
+ bool has_host;
+ char * host;
+ bool has_tftp;
+ char * tftp;
+ bool has_bootfile;
+ char * bootfile;
+ bool has_dhcpstart;
+ char * dhcpstart;
+ bool has_dns;
+ char * dns;
+ bool has_dnssearch;
+ StringList * dnssearch;
+ bool has_smb;
+ char * smb;
+ bool has_smbserver;
+ char * smbserver;
+ bool has_hostfwd;
+ StringList * hostfwd;
+ bool has_guestfwd;
+ StringList * guestfwd;
+};
+
+void qapi_free_NetdevUserOptionsList(NetdevUserOptionsList * obj);
+void qapi_free_NetdevUserOptions(NetdevUserOptions * obj);
+
+struct NetdevTapOptions
+{
+ bool has_ifname;
+ char * ifname;
+ bool has_fd;
+ char * fd;
+ bool has_fds;
+ char * fds;
+ bool has_script;
+ char * script;
+ bool has_downscript;
+ char * downscript;
+ bool has_helper;
+ char * helper;
+ bool has_sndbuf;
+ uint64_t sndbuf;
+ bool has_vnet_hdr;
+ bool vnet_hdr;
+ bool has_vhost;
+ bool vhost;
+ bool has_vhostfd;
+ char * vhostfd;
+ bool has_vhostfds;
+ char * vhostfds;
+ bool has_vhostforce;
+ bool vhostforce;
+ bool has_queues;
+ uint32_t queues;
+};
+
+void qapi_free_NetdevTapOptionsList(NetdevTapOptionsList * obj);
+void qapi_free_NetdevTapOptions(NetdevTapOptions * obj);
+
+struct NetdevSocketOptions
+{
+ bool has_fd;
+ char * fd;
+ bool has_listen;
+ char * listen;
+ bool has_connect;
+ char * connect;
+ bool has_mcast;
+ char * mcast;
+ bool has_localaddr;
+ char * localaddr;
+ bool has_udp;
+ char * udp;
+};
+
+void qapi_free_NetdevSocketOptionsList(NetdevSocketOptionsList * obj);
+void qapi_free_NetdevSocketOptions(NetdevSocketOptions * obj);
+
+struct NetdevVdeOptions
+{
+ bool has_sock;
+ char * sock;
+ bool has_port;
+ uint16_t port;
+ bool has_group;
+ char * group;
+ bool has_mode;
+ uint16_t mode;
+};
+
+void qapi_free_NetdevVdeOptionsList(NetdevVdeOptionsList * obj);
+void qapi_free_NetdevVdeOptions(NetdevVdeOptions * obj);
+
+struct NetdevDumpOptions
+{
+ bool has_len;
+ uint64_t len;
+ bool has_file;
+ char * file;
+};
+
+void qapi_free_NetdevDumpOptionsList(NetdevDumpOptionsList * obj);
+void qapi_free_NetdevDumpOptions(NetdevDumpOptions * obj);
+
+struct NetdevBridgeOptions
+{
+ bool has_br;
+ char * br;
+ bool has_helper;
+ char * helper;
+};
+
+void qapi_free_NetdevBridgeOptionsList(NetdevBridgeOptionsList * obj);
+void qapi_free_NetdevBridgeOptions(NetdevBridgeOptions * obj);
+
+struct NetdevHubPortOptions
+{
+ int32_t hubid;
+};
+
+void qapi_free_NetdevHubPortOptionsList(NetdevHubPortOptionsList * obj);
+void qapi_free_NetdevHubPortOptions(NetdevHubPortOptions * obj);
+
+struct NetClientOptions
+{
+ NetClientOptionsKind kind;
+ union {
+ void *data;
+ NetdevNoneOptions * none;
+ NetLegacyNicOptions * nic;
+ NetdevUserOptions * user;
+ NetdevTapOptions * tap;
+ NetdevSocketOptions * socket;
+ NetdevVdeOptions * vde;
+ NetdevDumpOptions * dump;
+ NetdevBridgeOptions * bridge;
+ NetdevHubPortOptions * hubport;
+ };
+};
+void qapi_free_NetClientOptionsList(NetClientOptionsList * obj);
+void qapi_free_NetClientOptions(NetClientOptions * obj);
+
+struct NetLegacy
+{
+ bool has_vlan;
+ int32_t vlan;
+ bool has_id;
+ char * id;
+ bool has_name;
+ char * name;
+ NetClientOptions * opts;
+};
+
+void qapi_free_NetLegacyList(NetLegacyList * obj);
+void qapi_free_NetLegacy(NetLegacy * obj);
+
+struct Netdev
+{
+ char * id;
+ NetClientOptions * opts;
+};
+
+void qapi_free_NetdevList(NetdevList * obj);
+void qapi_free_Netdev(Netdev * obj);
+
+struct InetSocketAddress
+{
+ char * host;
+ char * port;
+ bool has_to;
+ uint16_t to;
+ bool has_ipv4;
+ bool ipv4;
+ bool has_ipv6;
+ bool ipv6;
+};
+
+void qapi_free_InetSocketAddressList(InetSocketAddressList * obj);
+void qapi_free_InetSocketAddress(InetSocketAddress * obj);
+
+struct UnixSocketAddress
+{
+ char * path;
+};
+
+void qapi_free_UnixSocketAddressList(UnixSocketAddressList * obj);
+void qapi_free_UnixSocketAddress(UnixSocketAddress * obj);
+
+struct SocketAddress
+{
+ SocketAddressKind kind;
+ union {
+ void *data;
+ InetSocketAddress * inet;
+ UnixSocketAddress * q_unix;
+ String * fd;
+ };
+};
+void qapi_free_SocketAddressList(SocketAddressList * obj);
+void qapi_free_SocketAddress(SocketAddress * obj);
+
+struct MachineInfo
+{
+ char * name;
+ bool has_alias;
+ char * alias;
+ bool has_is_default;
+ bool is_default;
+ int64_t cpu_max;
+};
+
+void qapi_free_MachineInfoList(MachineInfoList * obj);
+void qapi_free_MachineInfo(MachineInfo * obj);
+
+struct CpuDefinitionInfo
+{
+ char * name;
+};
+
+void qapi_free_CpuDefinitionInfoList(CpuDefinitionInfoList * obj);
+void qapi_free_CpuDefinitionInfo(CpuDefinitionInfo * obj);
+
+struct AddfdInfo
+{
+ int64_t fdset_id;
+ int64_t fd;
+};
+
+void qapi_free_AddfdInfoList(AddfdInfoList * obj);
+void qapi_free_AddfdInfo(AddfdInfo * obj);
+
+struct FdsetFdInfo
+{
+ int64_t fd;
+ bool has_opaque;
+ char * opaque;
+};
+
+void qapi_free_FdsetFdInfoList(FdsetFdInfoList * obj);
+void qapi_free_FdsetFdInfo(FdsetFdInfo * obj);
+
+struct FdsetInfo
+{
+ int64_t fdset_id;
+ FdsetFdInfoList * fds;
+};
+
+void qapi_free_FdsetInfoList(FdsetInfoList * obj);
+void qapi_free_FdsetInfo(FdsetInfo * obj);
+
+struct TargetInfo
+{
+ char * arch;
+};
+
+void qapi_free_TargetInfoList(TargetInfoList * obj);
+void qapi_free_TargetInfo(TargetInfo * obj);
+
+void qapi_free_QKeyCodeList(QKeyCodeList * obj);
+
+struct KeyValue
+{
+ KeyValueKind kind;
+ union {
+ void *data;
+ int64_t number;
+ QKeyCode qcode;
+ };
+};
+void qapi_free_KeyValueList(KeyValueList * obj);
+void qapi_free_KeyValue(KeyValue * obj);
+
+struct ChardevFile
+{
+ bool has_in;
+ char * in;
+ char * out;
+};
+
+void qapi_free_ChardevFileList(ChardevFileList * obj);
+void qapi_free_ChardevFile(ChardevFile * obj);
+
+struct ChardevHostdev
+{
+ char * device;
+};
+
+void qapi_free_ChardevHostdevList(ChardevHostdevList * obj);
+void qapi_free_ChardevHostdev(ChardevHostdev * obj);
+
+struct ChardevSocket
+{
+ SocketAddress * addr;
+ bool has_server;
+ bool server;
+ bool has_wait;
+ bool wait;
+ bool has_nodelay;
+ bool nodelay;
+ bool has_telnet;
+ bool telnet;
+};
+
+void qapi_free_ChardevSocketList(ChardevSocketList * obj);
+void qapi_free_ChardevSocket(ChardevSocket * obj);
+
+struct ChardevUdp
+{
+ SocketAddress * remote;
+ bool has_local;
+ SocketAddress * local;
+};
+
+void qapi_free_ChardevUdpList(ChardevUdpList * obj);
+void qapi_free_ChardevUdp(ChardevUdp * obj);
+
+struct ChardevMux
+{
+ char * chardev;
+};
+
+void qapi_free_ChardevMuxList(ChardevMuxList * obj);
+void qapi_free_ChardevMux(ChardevMux * obj);
+
+struct ChardevStdio
+{
+ bool has_signal;
+ bool signal;
+};
+
+void qapi_free_ChardevStdioList(ChardevStdioList * obj);
+void qapi_free_ChardevStdio(ChardevStdio * obj);
+
+struct ChardevSpiceChannel
+{
+ char * type;
+};
+
+void qapi_free_ChardevSpiceChannelList(ChardevSpiceChannelList * obj);
+void qapi_free_ChardevSpiceChannel(ChardevSpiceChannel * obj);
+
+struct ChardevSpicePort
+{
+ char * fqdn;
+};
+
+void qapi_free_ChardevSpicePortList(ChardevSpicePortList * obj);
+void qapi_free_ChardevSpicePort(ChardevSpicePort * obj);
+
+struct ChardevVC
+{
+ bool has_width;
+ int64_t width;
+ bool has_height;
+ int64_t height;
+ bool has_cols;
+ int64_t cols;
+ bool has_rows;
+ int64_t rows;
+};
+
+void qapi_free_ChardevVCList(ChardevVCList * obj);
+void qapi_free_ChardevVC(ChardevVC * obj);
+
+struct ChardevMemory
+{
+ bool has_size;
+ int64_t size;
+};
+
+void qapi_free_ChardevMemoryList(ChardevMemoryList * obj);
+void qapi_free_ChardevMemory(ChardevMemory * obj);
+
+struct ChardevDummy
+{
+};
+
+void qapi_free_ChardevDummyList(ChardevDummyList * obj);
+void qapi_free_ChardevDummy(ChardevDummy * obj);
+
+struct ChardevBackend
+{
+ ChardevBackendKind kind;
+ union {
+ void *data;
+ ChardevFile * file;
+ ChardevHostdev * serial;
+ ChardevHostdev * parallel;
+ ChardevHostdev * pipe;
+ ChardevSocket * socket;
+ ChardevUdp * udp;
+ ChardevDummy * pty;
+ ChardevDummy * null;
+ ChardevMux * mux;
+ ChardevDummy * msmouse;
+ ChardevDummy * braille;
+ ChardevStdio * stdio;
+ ChardevDummy * console;
+ ChardevSpiceChannel * spicevmc;
+ ChardevSpicePort * spiceport;
+ ChardevVC * vc;
+ ChardevMemory * memory;
+ };
+};
+void qapi_free_ChardevBackendList(ChardevBackendList * obj);
+void qapi_free_ChardevBackend(ChardevBackend * obj);
+
+struct ChardevReturn
+{
+ bool has_pty;
+ char * pty;
+};
+
+void qapi_free_ChardevReturnList(ChardevReturnList * obj);
+void qapi_free_ChardevReturn(ChardevReturn * obj);
+
+void qapi_free_TpmModelList(TpmModelList * obj);
+
+void qapi_free_TpmTypeList(TpmTypeList * obj);
+
+struct TPMPassthroughOptions
+{
+ bool has_path;
+ char * path;
+ bool has_cancel_path;
+ char * cancel_path;
+};
+
+void qapi_free_TPMPassthroughOptionsList(TPMPassthroughOptionsList * obj);
+void qapi_free_TPMPassthroughOptions(TPMPassthroughOptions * obj);
+
+struct TpmTypeOptions
+{
+ TpmTypeOptionsKind kind;
+ union {
+ void *data;
+ TPMPassthroughOptions * passthrough;
+ };
+};
+void qapi_free_TpmTypeOptionsList(TpmTypeOptionsList * obj);
+void qapi_free_TpmTypeOptions(TpmTypeOptions * obj);
+
+struct TPMInfo
+{
+ char * id;
+ TpmModel model;
+ TpmTypeOptions * options;
+};
+
+void qapi_free_TPMInfoList(TPMInfoList * obj);
+void qapi_free_TPMInfo(TPMInfo * obj);
+
+struct AcpiTableOptions
+{
+ bool has_sig;
+ char * sig;
+ bool has_rev;
+ uint8_t rev;
+ bool has_oem_id;
+ char * oem_id;
+ bool has_oem_table_id;
+ char * oem_table_id;
+ bool has_oem_rev;
+ uint32_t oem_rev;
+ bool has_asl_compiler_id;
+ char * asl_compiler_id;
+ bool has_asl_compiler_rev;
+ uint32_t asl_compiler_rev;
+ bool has_file;
+ char * file;
+ bool has_data;
+ char * data;
+};
+
+void qapi_free_AcpiTableOptionsList(AcpiTableOptionsList * obj);
+void qapi_free_AcpiTableOptions(AcpiTableOptions * obj);
+
+void qapi_free_CommandLineParameterTypeList(CommandLineParameterTypeList * obj);
+
+struct CommandLineParameterInfo
+{
+ char * name;
+ CommandLineParameterType type;
+ bool has_help;
+ char * help;
+};
+
+void qapi_free_CommandLineParameterInfoList(CommandLineParameterInfoList * obj);
+void qapi_free_CommandLineParameterInfo(CommandLineParameterInfo * obj);
+
+struct CommandLineOptionInfo
+{
+ char * option;
+ CommandLineParameterInfoList * parameters;
+};
+
+void qapi_free_CommandLineOptionInfoList(CommandLineOptionInfoList * obj);
+void qapi_free_CommandLineOptionInfo(CommandLineOptionInfo * obj);
+
+void qapi_free_X86CPURegister32List(X86CPURegister32List * obj);
+
+struct X86CPUFeatureWordInfo
+{
+ int64_t cpuid_input_eax;
+ bool has_cpuid_input_ecx;
+ int64_t cpuid_input_ecx;
+ X86CPURegister32 cpuid_register;
+ int64_t features;
+};
+
+void qapi_free_X86CPUFeatureWordInfoList(X86CPUFeatureWordInfoList * obj);
+void qapi_free_X86CPUFeatureWordInfo(X86CPUFeatureWordInfo * obj);
+
+void qapi_free_RxStateList(RxStateList * obj);
+
+struct RxFilterInfo
+{
+ char * name;
+ bool promiscuous;
+ RxState multicast;
+ RxState unicast;
+ bool broadcast_allowed;
+ bool multicast_overflow;
+ bool unicast_overflow;
+ char * main_mac;
+ intList * vlan_table;
+ strList * unicast_table;
+ strList * multicast_table;
+};
+
+void qapi_free_RxFilterInfoList(RxFilterInfoList * obj);
+void qapi_free_RxFilterInfo(RxFilterInfo * obj);
+
+#endif
diff --git a/contrib/qemu/qemu-coroutine-lock.c b/contrib/qemu/qemu-coroutine-lock.c
new file mode 100644
index 000000000..d9fea4989
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine-lock.c
@@ -0,0 +1,178 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/coroutine.h"
+#include "block/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+ QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+void coroutine_fn qemu_co_queue_wait_insert_head(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+ Coroutine *next;
+
+ trace_qemu_co_queue_run_restart(co);
+ while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+ QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *next;
+
+ if (QTAILQ_EMPTY(&queue->entries)) {
+ return false;
+ }
+
+ while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+ trace_qemu_co_queue_next(next);
+ if (single) {
+ break;
+ }
+ }
+ return true;
+}
+
+bool qemu_co_queue_next(CoQueue *queue)
+{
+ return qemu_co_queue_do_restart(queue, true);
+}
+
+void qemu_co_queue_restart_all(CoQueue *queue)
+{
+ qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+ return (QTAILQ_FIRST(&queue->entries) == NULL);
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+ memset(mutex, 0, sizeof(*mutex));
+ qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_lock_entry(mutex, self);
+
+ while (mutex->locked) {
+ qemu_co_queue_wait(&mutex->queue);
+ }
+
+ mutex->locked = true;
+
+ trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+ assert(mutex->locked == true);
+ assert(qemu_in_coroutine());
+
+ mutex->locked = false;
+ qemu_co_queue_next(&mutex->queue);
+
+ trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+ qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+ while (lock->writer) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+ assert(qemu_in_coroutine());
+ if (lock->writer) {
+ lock->writer = false;
+ qemu_co_queue_restart_all(&lock->queue);
+ } else {
+ lock->reader--;
+ assert(lock->reader >= 0);
+ /* Wakeup only one waiting writer */
+ if (!lock->reader) {
+ qemu_co_queue_next(&lock->queue);
+ }
+ }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+ while (lock->writer || lock->reader) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->writer = true;
+}
diff --git a/contrib/qemu/qemu-coroutine-sleep.c b/contrib/qemu/qemu-coroutine-sleep.c
new file mode 100644
index 000000000..169ce5ccc
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine-sleep.c
@@ -0,0 +1,39 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "block/coroutine.h"
+#include "qemu/timer.h"
+
+typedef struct CoSleepCB {
+ QEMUTimer *ts;
+ Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+ CoSleepCB *sleep_cb = opaque;
+
+ qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns)
+{
+ CoSleepCB sleep_cb = {
+ .co = qemu_coroutine_self(),
+ };
+ sleep_cb.ts = qemu_new_timer(clock, SCALE_NS, co_sleep_cb, &sleep_cb);
+ qemu_mod_timer(sleep_cb.ts, qemu_get_clock_ns(clock) + ns);
+ qemu_coroutine_yield();
+ qemu_del_timer(sleep_cb.ts);
+ qemu_free_timer(sleep_cb.ts);
+}
diff --git a/contrib/qemu/qemu-coroutine.c b/contrib/qemu/qemu-coroutine.c
new file mode 100644
index 000000000..5e19307ee
--- /dev/null
+++ b/contrib/qemu/qemu-coroutine.c
@@ -0,0 +1,135 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "block/coroutine.h"
+#include "block/coroutine_int.h"
+
+enum {
+ /* Maximum free pool size prevents holding too many freed coroutines */
+ POOL_MAX_SIZE = 0,
+};
+
+/** Free list to speed up creation */
+static QemuMutex pool_lock;
+static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int pool_size;
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+ Coroutine *co;
+
+ qemu_mutex_lock(&pool_lock);
+ co = QSLIST_FIRST(&pool);
+ if (co) {
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ pool_size--;
+ }
+ qemu_mutex_unlock(&pool_lock);
+
+ if (!co) {
+ co = qemu_coroutine_new();
+ }
+
+ co->entry = entry;
+ QTAILQ_INIT(&co->co_queue_wakeup);
+ return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+ qemu_mutex_lock(&pool_lock);
+ if (pool_size < POOL_MAX_SIZE) {
+ QSLIST_INSERT_HEAD(&pool, co, pool_next);
+ co->caller = NULL;
+ pool_size++;
+ qemu_mutex_unlock(&pool_lock);
+ return;
+ }
+ qemu_mutex_unlock(&pool_lock);
+
+ qemu_coroutine_delete(co);
+}
+
+static void __attribute__((constructor)) coroutine_pool_init(void)
+{
+ qemu_mutex_init(&pool_lock);
+}
+
+static void __attribute__((destructor)) coroutine_pool_cleanup(void)
+{
+ Coroutine *co;
+ Coroutine *tmp;
+
+ QSLIST_FOREACH_SAFE(co, &pool, pool_next, tmp) {
+ QSLIST_REMOVE_HEAD(&pool, pool_next);
+ qemu_coroutine_delete(co);
+ }
+
+ qemu_mutex_destroy(&pool_lock);
+}
+
+static void coroutine_swap(Coroutine *from, Coroutine *to)
+{
+ CoroutineAction ret;
+
+ ret = qemu_coroutine_switch(from, to, COROUTINE_YIELD);
+
+ qemu_co_queue_run_restart(to);
+
+ switch (ret) {
+ case COROUTINE_YIELD:
+ return;
+ case COROUTINE_TERMINATE:
+ trace_qemu_coroutine_terminate(to);
+ coroutine_delete(to);
+ return;
+ default:
+ abort();
+ }
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_coroutine_enter(self, co, opaque);
+
+ if (co->caller) {
+ fprintf(stderr, "Co-routine re-entered recursively\n");
+ abort();
+ }
+
+ co->caller = self;
+ co->entry_arg = opaque;
+ coroutine_swap(self, co);
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *to = self->caller;
+
+ trace_qemu_coroutine_yield(self, to);
+
+ if (!to) {
+ fprintf(stderr, "Co-routine is yielding to no one\n");
+ abort();
+ }
+
+ self->caller = NULL;
+ coroutine_swap(self, to);
+}
diff --git a/contrib/qemu/qmp-commands.h b/contrib/qemu/qmp-commands.h
new file mode 100644
index 000000000..fcc0ff0f7
--- /dev/null
+++ b/contrib/qemu/qmp-commands.h
@@ -0,0 +1,204 @@
+/* THIS FILE IS AUTOMATICALLY GENERATED, DO NOT MODIFY */
+
+/*
+ * schema-defined QAPI function prototypes
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QMP_COMMANDS_H
+#define QMP_COMMANDS_H
+
+#include "qapi-types.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+
+void qmp_add_client(const char * protocol, const char * fdname, bool has_skipauth, bool skipauth, bool has_tls, bool tls, Error **errp);
+int qmp_marshal_input_add_client(Monitor *mon, const QDict *qdict, QObject **ret);
+NameInfo * qmp_query_name(Error **errp);
+int qmp_marshal_input_query_name(Monitor *mon, const QDict *qdict, QObject **ret);
+VersionInfo * qmp_query_version(Error **errp);
+int qmp_marshal_input_query_version(Monitor *mon, const QDict *qdict, QObject **ret);
+KvmInfo * qmp_query_kvm(Error **errp);
+int qmp_marshal_input_query_kvm(Monitor *mon, const QDict *qdict, QObject **ret);
+StatusInfo * qmp_query_status(Error **errp);
+int qmp_marshal_input_query_status(Monitor *mon, const QDict *qdict, QObject **ret);
+UuidInfo * qmp_query_uuid(Error **errp);
+int qmp_marshal_input_query_uuid(Monitor *mon, const QDict *qdict, QObject **ret);
+ChardevInfoList * qmp_query_chardev(Error **errp);
+int qmp_marshal_input_query_chardev(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_ringbuf_write(const char * device, const char * data, bool has_format, DataFormat format, Error **errp);
+int qmp_marshal_input_ringbuf_write(Monitor *mon, const QDict *qdict, QObject **ret);
+char * qmp_ringbuf_read(const char * device, int64_t size, bool has_format, DataFormat format, Error **errp);
+int qmp_marshal_input_ringbuf_read(Monitor *mon, const QDict *qdict, QObject **ret);
+CommandInfoList * qmp_query_commands(Error **errp);
+int qmp_marshal_input_query_commands(Monitor *mon, const QDict *qdict, QObject **ret);
+EventInfoList * qmp_query_events(Error **errp);
+int qmp_marshal_input_query_events(Monitor *mon, const QDict *qdict, QObject **ret);
+MigrationInfo * qmp_query_migrate(Error **errp);
+int qmp_marshal_input_query_migrate(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_capabilities(MigrationCapabilityStatusList * capabilities, Error **errp);
+int qmp_marshal_input_migrate_set_capabilities(Monitor *mon, const QDict *qdict, QObject **ret);
+MigrationCapabilityStatusList * qmp_query_migrate_capabilities(Error **errp);
+int qmp_marshal_input_query_migrate_capabilities(Monitor *mon, const QDict *qdict, QObject **ret);
+MouseInfoList * qmp_query_mice(Error **errp);
+int qmp_marshal_input_query_mice(Monitor *mon, const QDict *qdict, QObject **ret);
+CpuInfoList * qmp_query_cpus(Error **errp);
+int qmp_marshal_input_query_cpus(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockInfoList * qmp_query_block(Error **errp);
+int qmp_marshal_input_query_block(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockStatsList * qmp_query_blockstats(Error **errp);
+int qmp_marshal_input_query_blockstats(Monitor *mon, const QDict *qdict, QObject **ret);
+VncInfo * qmp_query_vnc(Error **errp);
+int qmp_marshal_input_query_vnc(Monitor *mon, const QDict *qdict, QObject **ret);
+SpiceInfo * qmp_query_spice(Error **errp);
+int qmp_marshal_input_query_spice(Monitor *mon, const QDict *qdict, QObject **ret);
+BalloonInfo * qmp_query_balloon(Error **errp);
+int qmp_marshal_input_query_balloon(Monitor *mon, const QDict *qdict, QObject **ret);
+PciInfoList * qmp_query_pci(Error **errp);
+int qmp_marshal_input_query_pci(Monitor *mon, const QDict *qdict, QObject **ret);
+BlockJobInfoList * qmp_query_block_jobs(Error **errp);
+int qmp_marshal_input_query_block_jobs(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_quit(Error **errp);
+int qmp_marshal_input_quit(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_stop(Error **errp);
+int qmp_marshal_input_stop(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_reset(Error **errp);
+int qmp_marshal_input_system_reset(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_powerdown(Error **errp);
+int qmp_marshal_input_system_powerdown(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cpu(int64_t index, Error **errp);
+int qmp_marshal_input_cpu(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cpu_add(int64_t id, Error **errp);
+int qmp_marshal_input_cpu_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_memsave(int64_t val, int64_t size, const char * filename, bool has_cpu_index, int64_t cpu_index, Error **errp);
+int qmp_marshal_input_memsave(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_pmemsave(int64_t val, int64_t size, const char * filename, Error **errp);
+int qmp_marshal_input_pmemsave(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_cont(Error **errp);
+int qmp_marshal_input_cont(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_system_wakeup(Error **errp);
+int qmp_marshal_input_system_wakeup(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_inject_nmi(Error **errp);
+int qmp_marshal_input_inject_nmi(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_set_link(const char * name, bool up, Error **errp);
+int qmp_marshal_input_set_link(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_passwd(const char * device, const char * password, Error **errp);
+int qmp_marshal_input_block_passwd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_balloon(int64_t value, Error **errp);
+int qmp_marshal_input_balloon(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_resize(const char * device, int64_t size, Error **errp);
+int qmp_marshal_input_block_resize(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_transaction(TransactionActionList * actions, Error **errp);
+int qmp_marshal_input_transaction(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_blockdev_snapshot_sync(const char * device, const char * snapshot_file, bool has_format, const char * format, bool has_mode, NewImageMode mode, Error **errp);
+int qmp_marshal_input_blockdev_snapshot_sync(Monitor *mon, const QDict *qdict, QObject **ret);
+char * qmp_human_monitor_command(const char * command_line, bool has_cpu_index, int64_t cpu_index, Error **errp);
+int qmp_marshal_input_human_monitor_command(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_commit(const char * device, bool has_base, const char * base, const char * top, bool has_speed, int64_t speed, Error **errp);
+int qmp_marshal_input_block_commit(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_drive_backup(const char * device, const char * target, bool has_format, const char * format, MirrorSyncMode sync, bool has_mode, NewImageMode mode, bool has_speed, int64_t speed, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp);
+int qmp_marshal_input_drive_backup(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_drive_mirror(const char * device, const char * target, bool has_format, const char * format, MirrorSyncMode sync, bool has_mode, NewImageMode mode, bool has_speed, int64_t speed, bool has_granularity, uint32_t granularity, bool has_buf_size, int64_t buf_size, bool has_on_source_error, BlockdevOnError on_source_error, bool has_on_target_error, BlockdevOnError on_target_error, Error **errp);
+int qmp_marshal_input_drive_mirror(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_cancel(Error **errp);
+int qmp_marshal_input_migrate_cancel(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_downtime(double value, Error **errp);
+int qmp_marshal_input_migrate_set_downtime(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_speed(int64_t value, Error **errp);
+int qmp_marshal_input_migrate_set_speed(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate_set_cache_size(int64_t value, Error **errp);
+int qmp_marshal_input_migrate_set_cache_size(Monitor *mon, const QDict *qdict, QObject **ret);
+int64_t qmp_query_migrate_cache_size(Error **errp);
+int qmp_marshal_input_query_migrate_cache_size(Monitor *mon, const QDict *qdict, QObject **ret);
+ObjectPropertyInfoList * qmp_qom_list(const char * path, Error **errp);
+int qmp_marshal_input_qom_list(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_set_password(const char * protocol, const char * password, bool has_connected, const char * connected, Error **errp);
+int qmp_marshal_input_set_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_expire_password(const char * protocol, const char * time, Error **errp);
+int qmp_marshal_input_expire_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_eject(const char * device, bool has_force, bool force, Error **errp);
+int qmp_marshal_input_eject(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_change_vnc_password(const char * password, Error **errp);
+int qmp_marshal_input_change_vnc_password(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_change(const char * device, const char * target, bool has_arg, const char * arg, Error **errp);
+int qmp_marshal_input_change(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_set_io_throttle(const char * device, int64_t bps, int64_t bps_rd, int64_t bps_wr, int64_t iops, int64_t iops_rd, int64_t iops_wr, Error **errp);
+int qmp_marshal_input_block_set_io_throttle(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_stream(const char * device, bool has_base, const char * base, bool has_speed, int64_t speed, bool has_on_error, BlockdevOnError on_error, Error **errp);
+int qmp_marshal_input_block_stream(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_set_speed(const char * device, int64_t speed, Error **errp);
+int qmp_marshal_input_block_job_set_speed(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_cancel(const char * device, bool has_force, bool force, Error **errp);
+int qmp_marshal_input_block_job_cancel(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_pause(const char * device, Error **errp);
+int qmp_marshal_input_block_job_pause(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_resume(const char * device, Error **errp);
+int qmp_marshal_input_block_job_resume(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_block_job_complete(const char * device, Error **errp);
+int qmp_marshal_input_block_job_complete(Monitor *mon, const QDict *qdict, QObject **ret);
+ObjectTypeInfoList * qmp_qom_list_types(bool has_implements, const char * implements, bool has_abstract, bool abstract, Error **errp);
+int qmp_marshal_input_qom_list_types(Monitor *mon, const QDict *qdict, QObject **ret);
+DevicePropertyInfoList * qmp_device_list_properties(const char * typename, Error **errp);
+int qmp_marshal_input_device_list_properties(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_migrate(const char * uri, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, Error **errp);
+int qmp_marshal_input_migrate(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_xen_save_devices_state(const char * filename, Error **errp);
+int qmp_marshal_input_xen_save_devices_state(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp);
+int qmp_marshal_input_xen_set_global_dirty_log(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_device_del(const char * id, Error **errp);
+int qmp_marshal_input_device_del(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_dump_guest_memory(bool paging, const char * protocol, bool has_begin, int64_t begin, bool has_length, int64_t length, Error **errp);
+int qmp_marshal_input_dump_guest_memory(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_netdev_del(const char * id, Error **errp);
+int qmp_marshal_input_netdev_del(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_getfd(const char * fdname, Error **errp);
+int qmp_marshal_input_getfd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_closefd(const char * fdname, Error **errp);
+int qmp_marshal_input_closefd(Monitor *mon, const QDict *qdict, QObject **ret);
+MachineInfoList * qmp_query_machines(Error **errp);
+int qmp_marshal_input_query_machines(Monitor *mon, const QDict *qdict, QObject **ret);
+CpuDefinitionInfoList * qmp_query_cpu_definitions(Error **errp);
+int qmp_marshal_input_query_cpu_definitions(Monitor *mon, const QDict *qdict, QObject **ret);
+AddfdInfo * qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, const char * opaque, Error **errp);
+int qmp_marshal_input_add_fd(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t fd, Error **errp);
+int qmp_marshal_input_remove_fd(Monitor *mon, const QDict *qdict, QObject **ret);
+FdsetInfoList * qmp_query_fdsets(Error **errp);
+int qmp_marshal_input_query_fdsets(Monitor *mon, const QDict *qdict, QObject **ret);
+TargetInfo * qmp_query_target(Error **errp);
+int qmp_marshal_input_query_target(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_send_key(KeyValueList * keys, bool has_hold_time, int64_t hold_time, Error **errp);
+int qmp_marshal_input_send_key(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_screendump(const char * filename, Error **errp);
+int qmp_marshal_input_screendump(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_start(SocketAddress * addr, Error **errp);
+int qmp_marshal_input_nbd_server_start(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_add(const char * device, bool has_writable, bool writable, Error **errp);
+int qmp_marshal_input_nbd_server_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_nbd_server_stop(Error **errp);
+int qmp_marshal_input_nbd_server_stop(Monitor *mon, const QDict *qdict, QObject **ret);
+ChardevReturn * qmp_chardev_add(const char * id, ChardevBackend * backend, Error **errp);
+int qmp_marshal_input_chardev_add(Monitor *mon, const QDict *qdict, QObject **ret);
+void qmp_chardev_remove(const char * id, Error **errp);
+int qmp_marshal_input_chardev_remove(Monitor *mon, const QDict *qdict, QObject **ret);
+TpmModelList * qmp_query_tpm_models(Error **errp);
+int qmp_marshal_input_query_tpm_models(Monitor *mon, const QDict *qdict, QObject **ret);
+TpmTypeList * qmp_query_tpm_types(Error **errp);
+int qmp_marshal_input_query_tpm_types(Monitor *mon, const QDict *qdict, QObject **ret);
+TPMInfoList * qmp_query_tpm(Error **errp);
+int qmp_marshal_input_query_tpm(Monitor *mon, const QDict *qdict, QObject **ret);
+CommandLineOptionInfoList * qmp_query_command_line_options(bool has_option, const char * option, Error **errp);
+int qmp_marshal_input_query_command_line_options(Monitor *mon, const QDict *qdict, QObject **ret);
+RxFilterInfoList * qmp_query_rx_filter(bool has_name, const char * name, Error **errp);
+int qmp_marshal_input_query_rx_filter(Monitor *mon, const QDict *qdict, QObject **ret);
+
+#endif
diff --git a/contrib/qemu/qobject/json-lexer.c b/contrib/qemu/qobject/json-lexer.c
new file mode 100644
index 000000000..440df6039
--- /dev/null
+++ b/contrib/qemu/qobject/json-lexer.c
@@ -0,0 +1,373 @@
+/*
+ * JSON lexer
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
+#include "qemu-common.h"
+#include "qapi/qmp/json-lexer.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+
+/*
+ * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
+ * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
+ * 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
+ * [{}\[\],:]
+ * [a-z]+
+ *
+ */
+
+enum json_lexer_state {
+ IN_ERROR = 0,
+ IN_DQ_UCODE3,
+ IN_DQ_UCODE2,
+ IN_DQ_UCODE1,
+ IN_DQ_UCODE0,
+ IN_DQ_STRING_ESCAPE,
+ IN_DQ_STRING,
+ IN_SQ_UCODE3,
+ IN_SQ_UCODE2,
+ IN_SQ_UCODE1,
+ IN_SQ_UCODE0,
+ IN_SQ_STRING_ESCAPE,
+ IN_SQ_STRING,
+ IN_ZERO,
+ IN_DIGITS,
+ IN_DIGIT,
+ IN_EXP_E,
+ IN_MANTISSA,
+ IN_MANTISSA_DIGITS,
+ IN_NONZERO_NUMBER,
+ IN_NEG_NONZERO_NUMBER,
+ IN_KEYWORD,
+ IN_ESCAPE,
+ IN_ESCAPE_L,
+ IN_ESCAPE_LL,
+ IN_ESCAPE_I,
+ IN_ESCAPE_I6,
+ IN_ESCAPE_I64,
+ IN_WHITESPACE,
+ IN_START,
+};
+
+#define TERMINAL(state) [0 ... 0x7F] = (state)
+
+/* Return whether TERMINAL is a terminal state and the transition to it
+ from OLD_STATE required lookahead. This happens whenever the table
+ below uses the TERMINAL macro. */
+#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
+ (json_lexer[(old_state)][0] == (terminal))
+
+static const uint8_t json_lexer[][256] = {
+ /* double quote string */
+ [IN_DQ_UCODE3] = {
+ ['0' ... '9'] = IN_DQ_STRING,
+ ['a' ... 'f'] = IN_DQ_STRING,
+ ['A' ... 'F'] = IN_DQ_STRING,
+ },
+ [IN_DQ_UCODE2] = {
+ ['0' ... '9'] = IN_DQ_UCODE3,
+ ['a' ... 'f'] = IN_DQ_UCODE3,
+ ['A' ... 'F'] = IN_DQ_UCODE3,
+ },
+ [IN_DQ_UCODE1] = {
+ ['0' ... '9'] = IN_DQ_UCODE2,
+ ['a' ... 'f'] = IN_DQ_UCODE2,
+ ['A' ... 'F'] = IN_DQ_UCODE2,
+ },
+ [IN_DQ_UCODE0] = {
+ ['0' ... '9'] = IN_DQ_UCODE1,
+ ['a' ... 'f'] = IN_DQ_UCODE1,
+ ['A' ... 'F'] = IN_DQ_UCODE1,
+ },
+ [IN_DQ_STRING_ESCAPE] = {
+ ['b'] = IN_DQ_STRING,
+ ['f'] = IN_DQ_STRING,
+ ['n'] = IN_DQ_STRING,
+ ['r'] = IN_DQ_STRING,
+ ['t'] = IN_DQ_STRING,
+ ['/'] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING,
+ ['\''] = IN_DQ_STRING,
+ ['\"'] = IN_DQ_STRING,
+ ['u'] = IN_DQ_UCODE0,
+ },
+ [IN_DQ_STRING] = {
+ [1 ... 0xBF] = IN_DQ_STRING,
+ [0xC2 ... 0xF4] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING_ESCAPE,
+ ['"'] = JSON_STRING,
+ },
+
+ /* single quote string */
+ [IN_SQ_UCODE3] = {
+ ['0' ... '9'] = IN_SQ_STRING,
+ ['a' ... 'f'] = IN_SQ_STRING,
+ ['A' ... 'F'] = IN_SQ_STRING,
+ },
+ [IN_SQ_UCODE2] = {
+ ['0' ... '9'] = IN_SQ_UCODE3,
+ ['a' ... 'f'] = IN_SQ_UCODE3,
+ ['A' ... 'F'] = IN_SQ_UCODE3,
+ },
+ [IN_SQ_UCODE1] = {
+ ['0' ... '9'] = IN_SQ_UCODE2,
+ ['a' ... 'f'] = IN_SQ_UCODE2,
+ ['A' ... 'F'] = IN_SQ_UCODE2,
+ },
+ [IN_SQ_UCODE0] = {
+ ['0' ... '9'] = IN_SQ_UCODE1,
+ ['a' ... 'f'] = IN_SQ_UCODE1,
+ ['A' ... 'F'] = IN_SQ_UCODE1,
+ },
+ [IN_SQ_STRING_ESCAPE] = {
+ ['b'] = IN_SQ_STRING,
+ ['f'] = IN_SQ_STRING,
+ ['n'] = IN_SQ_STRING,
+ ['r'] = IN_SQ_STRING,
+ ['t'] = IN_SQ_STRING,
+ ['/'] = IN_DQ_STRING,
+ ['\\'] = IN_DQ_STRING,
+ ['\''] = IN_SQ_STRING,
+ ['\"'] = IN_SQ_STRING,
+ ['u'] = IN_SQ_UCODE0,
+ },
+ [IN_SQ_STRING] = {
+ [1 ... 0xBF] = IN_SQ_STRING,
+ [0xC2 ... 0xF4] = IN_SQ_STRING,
+ ['\\'] = IN_SQ_STRING_ESCAPE,
+ ['\''] = JSON_STRING,
+ },
+
+ /* Zero */
+ [IN_ZERO] = {
+ TERMINAL(JSON_INTEGER),
+ ['0' ... '9'] = IN_ERROR,
+ ['.'] = IN_MANTISSA,
+ },
+
+ /* Float */
+ [IN_DIGITS] = {
+ TERMINAL(JSON_FLOAT),
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_DIGIT] = {
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_EXP_E] = {
+ ['-'] = IN_DIGIT,
+ ['+'] = IN_DIGIT,
+ ['0' ... '9'] = IN_DIGITS,
+ },
+
+ [IN_MANTISSA_DIGITS] = {
+ TERMINAL(JSON_FLOAT),
+ ['0' ... '9'] = IN_MANTISSA_DIGITS,
+ ['e'] = IN_EXP_E,
+ ['E'] = IN_EXP_E,
+ },
+
+ [IN_MANTISSA] = {
+ ['0' ... '9'] = IN_MANTISSA_DIGITS,
+ },
+
+ /* Number */
+ [IN_NONZERO_NUMBER] = {
+ TERMINAL(JSON_INTEGER),
+ ['0' ... '9'] = IN_NONZERO_NUMBER,
+ ['e'] = IN_EXP_E,
+ ['E'] = IN_EXP_E,
+ ['.'] = IN_MANTISSA,
+ },
+
+ [IN_NEG_NONZERO_NUMBER] = {
+ ['0'] = IN_ZERO,
+ ['1' ... '9'] = IN_NONZERO_NUMBER,
+ },
+
+ /* keywords */
+ [IN_KEYWORD] = {
+ TERMINAL(JSON_KEYWORD),
+ ['a' ... 'z'] = IN_KEYWORD,
+ },
+
+ /* whitespace */
+ [IN_WHITESPACE] = {
+ TERMINAL(JSON_SKIP),
+ [' '] = IN_WHITESPACE,
+ ['\t'] = IN_WHITESPACE,
+ ['\r'] = IN_WHITESPACE,
+ ['\n'] = IN_WHITESPACE,
+ },
+
+ /* escape */
+ [IN_ESCAPE_LL] = {
+ ['d'] = JSON_ESCAPE,
+ },
+
+ [IN_ESCAPE_L] = {
+ ['d'] = JSON_ESCAPE,
+ ['l'] = IN_ESCAPE_LL,
+ },
+
+ [IN_ESCAPE_I64] = {
+ ['d'] = JSON_ESCAPE,
+ },
+
+ [IN_ESCAPE_I6] = {
+ ['4'] = IN_ESCAPE_I64,
+ },
+
+ [IN_ESCAPE_I] = {
+ ['6'] = IN_ESCAPE_I6,
+ },
+
+ [IN_ESCAPE] = {
+ ['d'] = JSON_ESCAPE,
+ ['i'] = JSON_ESCAPE,
+ ['p'] = JSON_ESCAPE,
+ ['s'] = JSON_ESCAPE,
+ ['f'] = JSON_ESCAPE,
+ ['l'] = IN_ESCAPE_L,
+ ['I'] = IN_ESCAPE_I,
+ },
+
+ /* top level rule */
+ [IN_START] = {
+ ['"'] = IN_DQ_STRING,
+ ['\''] = IN_SQ_STRING,
+ ['0'] = IN_ZERO,
+ ['1' ... '9'] = IN_NONZERO_NUMBER,
+ ['-'] = IN_NEG_NONZERO_NUMBER,
+ ['{'] = JSON_OPERATOR,
+ ['}'] = JSON_OPERATOR,
+ ['['] = JSON_OPERATOR,
+ [']'] = JSON_OPERATOR,
+ [','] = JSON_OPERATOR,
+ [':'] = JSON_OPERATOR,
+ ['a' ... 'z'] = IN_KEYWORD,
+ ['%'] = IN_ESCAPE,
+ [' '] = IN_WHITESPACE,
+ ['\t'] = IN_WHITESPACE,
+ ['\r'] = IN_WHITESPACE,
+ ['\n'] = IN_WHITESPACE,
+ },
+};
+
+void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
+{
+ lexer->emit = func;
+ lexer->state = IN_START;
+ lexer->token = qstring_new();
+ lexer->x = lexer->y = 0;
+}
+
+static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
+{
+ int char_consumed, new_state;
+
+ lexer->x++;
+ if (ch == '\n') {
+ lexer->x = 0;
+ lexer->y++;
+ }
+
+ do {
+ new_state = json_lexer[lexer->state][(uint8_t)ch];
+ char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
+ if (char_consumed) {
+ qstring_append_chr(lexer->token, ch);
+ }
+
+ switch (new_state) {
+ case JSON_OPERATOR:
+ case JSON_ESCAPE:
+ case JSON_INTEGER:
+ case JSON_FLOAT:
+ case JSON_KEYWORD:
+ case JSON_STRING:
+ lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
+ /* fall through */
+ case JSON_SKIP:
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ new_state = IN_START;
+ break;
+ case IN_ERROR:
+ /* XXX: To avoid having previous bad input leaving the parser in an
+ * unresponsive state where we consume unpredictable amounts of
+ * subsequent "good" input, percolate this error state up to the
+ * tokenizer/parser by forcing a NULL object to be emitted, then
+ * reset state.
+ *
+ * Also note that this handling is required for reliable channel
+ * negotiation between QMP and the guest agent, since chr(0xFF)
+ * is placed at the beginning of certain events to ensure proper
+ * delivery when the channel is in an unknown state. chr(0xFF) is
+ * never a valid ASCII/UTF-8 sequence, so this should reliably
+ * induce an error/flush state.
+ */
+ lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ new_state = IN_START;
+ lexer->state = new_state;
+ return 0;
+ default:
+ break;
+ }
+ lexer->state = new_state;
+ } while (!char_consumed && !flush);
+
+ /* Do not let a single token grow to an arbitrarily large size,
+ * this is a security consideration.
+ */
+ if (lexer->token->length > MAX_TOKEN_SIZE) {
+ lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
+ QDECREF(lexer->token);
+ lexer->token = qstring_new();
+ lexer->state = IN_START;
+ }
+
+ return 0;
+}
+
+int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ int err;
+
+ err = json_lexer_feed_char(lexer, buffer[i], false);
+ if (err < 0) {
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int json_lexer_flush(JSONLexer *lexer)
+{
+ return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
+}
+
+void json_lexer_destroy(JSONLexer *lexer)
+{
+ QDECREF(lexer->token);
+}
diff --git a/contrib/qemu/qobject/json-parser.c b/contrib/qemu/qobject/json-parser.c
new file mode 100644
index 000000000..e7947b340
--- /dev/null
+++ b/contrib/qemu/qobject/json-parser.c
@@ -0,0 +1,724 @@
+/*
+ * JSON Parser
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <stdarg.h>
+
+#include "qemu-common.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/qerror.h"
+
+typedef struct JSONParserContext
+{
+ Error *err;
+ struct {
+ QObject **buf;
+ size_t pos;
+ size_t count;
+ } tokens;
+} JSONParserContext;
+
+#define BUG_ON(cond) assert(!(cond))
+
+/**
+ * TODO
+ *
+ * 0) make errors meaningful again
+ * 1) add geometry information to tokens
+ * 3) should we return a parsed size?
+ * 4) deal with premature EOI
+ */
+
+static QObject *parse_value(JSONParserContext *ctxt, va_list *ap);
+
+/**
+ * Token manipulators
+ *
+ * tokens are dictionaries that contain a type, a string value, and geometry information
+ * about a token identified by the lexer. These are routines that make working with
+ * these objects a bit easier.
+ */
+static const char *token_get_value(QObject *obj)
+{
+ return qdict_get_str(qobject_to_qdict(obj), "token");
+}
+
+static JSONTokenType token_get_type(QObject *obj)
+{
+ return qdict_get_int(qobject_to_qdict(obj), "type");
+}
+
+static int token_is_operator(QObject *obj, char op)
+{
+ const char *val;
+
+ if (token_get_type(obj) != JSON_OPERATOR) {
+ return 0;
+ }
+
+ val = token_get_value(obj);
+
+ return (val[0] == op) && (val[1] == 0);
+}
+
+static int token_is_keyword(QObject *obj, const char *value)
+{
+ if (token_get_type(obj) != JSON_KEYWORD) {
+ return 0;
+ }
+
+ return strcmp(token_get_value(obj), value) == 0;
+}
+
+static int token_is_escape(QObject *obj, const char *value)
+{
+ if (token_get_type(obj) != JSON_ESCAPE) {
+ return 0;
+ }
+
+ return (strcmp(token_get_value(obj), value) == 0);
+}
+
+/**
+ * Error handler
+ */
+static void GCC_FMT_ATTR(3, 4) parse_error(JSONParserContext *ctxt,
+ QObject *token, const char *msg, ...)
+{
+ va_list ap;
+ char message[1024];
+ va_start(ap, msg);
+ vsnprintf(message, sizeof(message), msg, ap);
+ va_end(ap);
+ if (ctxt->err) {
+ error_free(ctxt->err);
+ ctxt->err = NULL;
+ }
+ error_set(&ctxt->err, QERR_JSON_PARSE_ERROR, message);
+}
+
+/**
+ * String helpers
+ *
+ * These helpers are used to unescape strings.
+ */
+static void wchar_to_utf8(uint16_t wchar, char *buffer, size_t buffer_length)
+{
+ if (wchar <= 0x007F) {
+ BUG_ON(buffer_length < 2);
+
+ buffer[0] = wchar & 0x7F;
+ buffer[1] = 0;
+ } else if (wchar <= 0x07FF) {
+ BUG_ON(buffer_length < 3);
+
+ buffer[0] = 0xC0 | ((wchar >> 6) & 0x1F);
+ buffer[1] = 0x80 | (wchar & 0x3F);
+ buffer[2] = 0;
+ } else {
+ BUG_ON(buffer_length < 4);
+
+ buffer[0] = 0xE0 | ((wchar >> 12) & 0x0F);
+ buffer[1] = 0x80 | ((wchar >> 6) & 0x3F);
+ buffer[2] = 0x80 | (wchar & 0x3F);
+ buffer[3] = 0;
+ }
+}
+
+static int hex2decimal(char ch)
+{
+ if (ch >= '0' && ch <= '9') {
+ return (ch - '0');
+ } else if (ch >= 'a' && ch <= 'f') {
+ return 10 + (ch - 'a');
+ } else if (ch >= 'A' && ch <= 'F') {
+ return 10 + (ch - 'A');
+ }
+
+ return -1;
+}
+
+/**
+ * parse_string(): Parse a json string and return a QObject
+ *
+ * string
+ * ""
+ * " chars "
+ * chars
+ * char
+ * char chars
+ * char
+ * any-Unicode-character-
+ * except-"-or-\-or-
+ * control-character
+ * \"
+ * \\
+ * \/
+ * \b
+ * \f
+ * \n
+ * \r
+ * \t
+ * \u four-hex-digits
+ */
+static QString *qstring_from_escaped_str(JSONParserContext *ctxt, QObject *token)
+{
+ const char *ptr = token_get_value(token);
+ QString *str;
+ int double_quote = 1;
+
+ if (*ptr == '"') {
+ double_quote = 1;
+ } else {
+ double_quote = 0;
+ }
+ ptr++;
+
+ str = qstring_new();
+ while (*ptr &&
+ ((double_quote && *ptr != '"') || (!double_quote && *ptr != '\''))) {
+ if (*ptr == '\\') {
+ ptr++;
+
+ switch (*ptr) {
+ case '"':
+ qstring_append(str, "\"");
+ ptr++;
+ break;
+ case '\'':
+ qstring_append(str, "'");
+ ptr++;
+ break;
+ case '\\':
+ qstring_append(str, "\\");
+ ptr++;
+ break;
+ case '/':
+ qstring_append(str, "/");
+ ptr++;
+ break;
+ case 'b':
+ qstring_append(str, "\b");
+ ptr++;
+ break;
+ case 'f':
+ qstring_append(str, "\f");
+ ptr++;
+ break;
+ case 'n':
+ qstring_append(str, "\n");
+ ptr++;
+ break;
+ case 'r':
+ qstring_append(str, "\r");
+ ptr++;
+ break;
+ case 't':
+ qstring_append(str, "\t");
+ ptr++;
+ break;
+ case 'u': {
+ uint16_t unicode_char = 0;
+ char utf8_char[4];
+ int i = 0;
+
+ ptr++;
+
+ for (i = 0; i < 4; i++) {
+ if (qemu_isxdigit(*ptr)) {
+ unicode_char |= hex2decimal(*ptr) << ((3 - i) * 4);
+ } else {
+ parse_error(ctxt, token,
+ "invalid hex escape sequence in string");
+ goto out;
+ }
+ ptr++;
+ }
+
+ wchar_to_utf8(unicode_char, utf8_char, sizeof(utf8_char));
+ qstring_append(str, utf8_char);
+ } break;
+ default:
+ parse_error(ctxt, token, "invalid escape sequence in string");
+ goto out;
+ }
+ } else {
+ char dummy[2];
+
+ dummy[0] = *ptr++;
+ dummy[1] = 0;
+
+ qstring_append(str, dummy);
+ }
+ }
+
+ return str;
+
+out:
+ QDECREF(str);
+ return NULL;
+}
+
+static QObject *parser_context_pop_token(JSONParserContext *ctxt)
+{
+ QObject *token;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ token = ctxt->tokens.buf[ctxt->tokens.pos];
+ ctxt->tokens.pos++;
+ return token;
+}
+
+/* Note: parser_context_{peek|pop}_token do not increment the
+ * token object's refcount. In both cases the references will continue
+ * to be tracked and cleaned up in parser_context_free(), so do not
+ * attempt to free the token object.
+ */
+static QObject *parser_context_peek_token(JSONParserContext *ctxt)
+{
+ QObject *token;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ token = ctxt->tokens.buf[ctxt->tokens.pos];
+ return token;
+}
+
+static JSONParserContext parser_context_save(JSONParserContext *ctxt)
+{
+ JSONParserContext saved_ctxt = {0};
+ saved_ctxt.tokens.pos = ctxt->tokens.pos;
+ saved_ctxt.tokens.count = ctxt->tokens.count;
+ saved_ctxt.tokens.buf = ctxt->tokens.buf;
+ return saved_ctxt;
+}
+
+static void parser_context_restore(JSONParserContext *ctxt,
+ JSONParserContext saved_ctxt)
+{
+ ctxt->tokens.pos = saved_ctxt.tokens.pos;
+ ctxt->tokens.count = saved_ctxt.tokens.count;
+ ctxt->tokens.buf = saved_ctxt.tokens.buf;
+}
+
+static void tokens_append_from_iter(QObject *obj, void *opaque)
+{
+ JSONParserContext *ctxt = opaque;
+ g_assert(ctxt->tokens.pos < ctxt->tokens.count);
+ ctxt->tokens.buf[ctxt->tokens.pos++] = obj;
+ qobject_incref(obj);
+}
+
+static JSONParserContext *parser_context_new(QList *tokens)
+{
+ JSONParserContext *ctxt;
+ size_t count;
+
+ if (!tokens) {
+ return NULL;
+ }
+
+ count = qlist_size(tokens);
+ if (count == 0) {
+ return NULL;
+ }
+
+ ctxt = g_malloc0(sizeof(JSONParserContext));
+ ctxt->tokens.pos = 0;
+ ctxt->tokens.count = count;
+ ctxt->tokens.buf = g_malloc(count * sizeof(QObject *));
+ qlist_iter(tokens, tokens_append_from_iter, ctxt);
+ ctxt->tokens.pos = 0;
+
+ return ctxt;
+}
+
+/* to support error propagation, ctxt->err must be freed separately */
+static void parser_context_free(JSONParserContext *ctxt)
+{
+ int i;
+ if (ctxt) {
+ for (i = 0; i < ctxt->tokens.count; i++) {
+ qobject_decref(ctxt->tokens.buf[i]);
+ }
+ g_free(ctxt->tokens.buf);
+ g_free(ctxt);
+ }
+}
+
+/**
+ * Parsing rules
+ */
+static int parse_pair(JSONParserContext *ctxt, QDict *dict, va_list *ap)
+{
+ QObject *key = NULL, *token = NULL, *value, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ key = parse_value(ctxt, ap);
+ if (!key || qobject_type(key) != QTYPE_QSTRING) {
+ parse_error(ctxt, peek, "key is not a string in object");
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(token, ':')) {
+ parse_error(ctxt, token, "missing : in object pair");
+ goto out;
+ }
+
+ value = parse_value(ctxt, ap);
+ if (value == NULL) {
+ parse_error(ctxt, token, "Missing value in dict");
+ goto out;
+ }
+
+ qdict_put_obj(dict, qstring_get_str(qobject_to_qstring(key)), value);
+
+ qobject_decref(key);
+
+ return 0;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ qobject_decref(key);
+
+ return -1;
+}
+
+static QObject *parse_object(JSONParserContext *ctxt, va_list *ap)
+{
+ QDict *dict = NULL;
+ QObject *token, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (!token_is_operator(token, '{')) {
+ goto out;
+ }
+ token = NULL;
+
+ dict = qdict_new();
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(peek, '}')) {
+ if (parse_pair(ctxt, dict, ap) == -1) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ while (!token_is_operator(token, '}')) {
+ if (!token_is_operator(token, ',')) {
+ parse_error(ctxt, token, "expected separator in dict");
+ goto out;
+ }
+ token = NULL;
+
+ if (parse_pair(ctxt, dict, ap) == -1) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+ }
+ token = NULL;
+ } else {
+ token = parser_context_pop_token(ctxt);
+ token = NULL;
+ }
+
+ return QOBJECT(dict);
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ QDECREF(dict);
+ return NULL;
+}
+
+static QObject *parse_array(JSONParserContext *ctxt, va_list *ap)
+{
+ QList *list = NULL;
+ QObject *token, *peek;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (!token_is_operator(token, '[')) {
+ token = NULL;
+ goto out;
+ }
+ token = NULL;
+
+ list = qlist_new();
+
+ peek = parser_context_peek_token(ctxt);
+ if (peek == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ if (!token_is_operator(peek, ']')) {
+ QObject *obj;
+
+ obj = parse_value(ctxt, ap);
+ if (obj == NULL) {
+ parse_error(ctxt, token, "expecting value");
+ goto out;
+ }
+
+ qlist_append_obj(list, obj);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+
+ while (!token_is_operator(token, ']')) {
+ if (!token_is_operator(token, ',')) {
+ parse_error(ctxt, token, "expected separator in list");
+ goto out;
+ }
+
+ token = NULL;
+
+ obj = parse_value(ctxt, ap);
+ if (obj == NULL) {
+ parse_error(ctxt, token, "expecting value");
+ goto out;
+ }
+
+ qlist_append_obj(list, obj);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ parse_error(ctxt, NULL, "premature EOI");
+ goto out;
+ }
+ }
+
+ token = NULL;
+ } else {
+ token = parser_context_pop_token(ctxt);
+ token = NULL;
+ }
+
+ return QOBJECT(list);
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+ QDECREF(list);
+ return NULL;
+}
+
+static QObject *parse_keyword(JSONParserContext *ctxt)
+{
+ QObject *token, *ret;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (token_get_type(token) != JSON_KEYWORD) {
+ goto out;
+ }
+
+ if (token_is_keyword(token, "true")) {
+ ret = QOBJECT(qbool_from_int(true));
+ } else if (token_is_keyword(token, "false")) {
+ ret = QOBJECT(qbool_from_int(false));
+ } else {
+ parse_error(ctxt, token, "invalid keyword `%s'", token_get_value(token));
+ goto out;
+ }
+
+ return ret;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_escape(JSONParserContext *ctxt, va_list *ap)
+{
+ QObject *token = NULL, *obj;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ if (ap == NULL) {
+ goto out;
+ }
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ if (token_is_escape(token, "%p")) {
+ obj = va_arg(*ap, QObject *);
+ } else if (token_is_escape(token, "%i")) {
+ obj = QOBJECT(qbool_from_int(va_arg(*ap, int)));
+ } else if (token_is_escape(token, "%d")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, int)));
+ } else if (token_is_escape(token, "%ld")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, long)));
+ } else if (token_is_escape(token, "%lld") ||
+ token_is_escape(token, "%I64d")) {
+ obj = QOBJECT(qint_from_int(va_arg(*ap, long long)));
+ } else if (token_is_escape(token, "%s")) {
+ obj = QOBJECT(qstring_from_str(va_arg(*ap, const char *)));
+ } else if (token_is_escape(token, "%f")) {
+ obj = QOBJECT(qfloat_from_double(va_arg(*ap, double)));
+ } else {
+ goto out;
+ }
+
+ return obj;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_literal(JSONParserContext *ctxt)
+{
+ QObject *token, *obj;
+ JSONParserContext saved_ctxt = parser_context_save(ctxt);
+
+ token = parser_context_pop_token(ctxt);
+ if (token == NULL) {
+ goto out;
+ }
+
+ switch (token_get_type(token)) {
+ case JSON_STRING:
+ obj = QOBJECT(qstring_from_escaped_str(ctxt, token));
+ break;
+ case JSON_INTEGER: {
+ /* A possibility exists that this is a whole-valued float where the
+ * fractional part was left out due to being 0 (.0). It's not a big
+ * deal to treat these as ints in the parser, so long as users of the
+ * resulting QObject know to expect a QInt in place of a QFloat in
+ * cases like these.
+ *
+ * However, in some cases these values will overflow/underflow a
+ * QInt/int64 container, thus we should assume these are to be handled
+ * as QFloats/doubles rather than silently changing their values.
+ *
+ * strtoll() indicates these instances by setting errno to ERANGE
+ */
+ int64_t value;
+
+ errno = 0; /* strtoll doesn't set errno on success */
+ value = strtoll(token_get_value(token), NULL, 10);
+ if (errno != ERANGE) {
+ obj = QOBJECT(qint_from_int(value));
+ break;
+ }
+ /* fall through to JSON_FLOAT */
+ }
+ case JSON_FLOAT:
+ /* FIXME dependent on locale */
+ obj = QOBJECT(qfloat_from_double(strtod(token_get_value(token), NULL)));
+ break;
+ default:
+ goto out;
+ }
+
+ return obj;
+
+out:
+ parser_context_restore(ctxt, saved_ctxt);
+
+ return NULL;
+}
+
+static QObject *parse_value(JSONParserContext *ctxt, va_list *ap)
+{
+ QObject *obj;
+
+ obj = parse_object(ctxt, ap);
+ if (obj == NULL) {
+ obj = parse_array(ctxt, ap);
+ }
+ if (obj == NULL) {
+ obj = parse_escape(ctxt, ap);
+ }
+ if (obj == NULL) {
+ obj = parse_keyword(ctxt);
+ }
+ if (obj == NULL) {
+ obj = parse_literal(ctxt);
+ }
+
+ return obj;
+}
+
+QObject *json_parser_parse(QList *tokens, va_list *ap)
+{
+ return json_parser_parse_err(tokens, ap, NULL);
+}
+
+QObject *json_parser_parse_err(QList *tokens, va_list *ap, Error **errp)
+{
+ JSONParserContext *ctxt = parser_context_new(tokens);
+ QObject *result;
+
+ if (!ctxt) {
+ return NULL;
+ }
+
+ result = parse_value(ctxt, ap);
+
+ error_propagate(errp, ctxt->err);
+
+ parser_context_free(ctxt);
+
+ return result;
+}
diff --git a/contrib/qemu/qobject/json-streamer.c b/contrib/qemu/qobject/json-streamer.c
new file mode 100644
index 000000000..1b2f9b1d1
--- /dev/null
+++ b/contrib/qemu/qobject/json-streamer.c
@@ -0,0 +1,122 @@
+/*
+ * JSON streaming support
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu-common.h"
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/json-streamer.h"
+
+#define MAX_TOKEN_SIZE (64ULL << 20)
+#define MAX_NESTING (1ULL << 10)
+
+static void json_message_process_token(JSONLexer *lexer, QString *token, JSONTokenType type, int x, int y)
+{
+ JSONMessageParser *parser = container_of(lexer, JSONMessageParser, lexer);
+ QDict *dict;
+
+ if (type == JSON_OPERATOR) {
+ switch (qstring_get_str(token)[0]) {
+ case '{':
+ parser->brace_count++;
+ break;
+ case '}':
+ parser->brace_count--;
+ break;
+ case '[':
+ parser->bracket_count++;
+ break;
+ case ']':
+ parser->bracket_count--;
+ break;
+ default:
+ break;
+ }
+ }
+
+ dict = qdict_new();
+ qdict_put(dict, "type", qint_from_int(type));
+ QINCREF(token);
+ qdict_put(dict, "token", token);
+ qdict_put(dict, "x", qint_from_int(x));
+ qdict_put(dict, "y", qint_from_int(y));
+
+ parser->token_size += token->length;
+
+ qlist_append(parser->tokens, dict);
+
+ if (type == JSON_ERROR) {
+ goto out_emit_bad;
+ } else if (parser->brace_count < 0 ||
+ parser->bracket_count < 0 ||
+ (parser->brace_count == 0 &&
+ parser->bracket_count == 0)) {
+ goto out_emit;
+ } else if (parser->token_size > MAX_TOKEN_SIZE ||
+ parser->bracket_count > MAX_NESTING ||
+ parser->brace_count > MAX_NESTING) {
+ /* Security consideration, we limit total memory allocated per object
+ * and the maximum recursion depth that a message can force.
+ */
+ goto out_emit;
+ }
+
+ return;
+
+out_emit_bad:
+ /* clear out token list and tell the parser to emit and error
+ * indication by passing it a NULL list
+ */
+ QDECREF(parser->tokens);
+ parser->tokens = NULL;
+out_emit:
+ /* send current list of tokens to parser and reset tokenizer */
+ parser->brace_count = 0;
+ parser->bracket_count = 0;
+ parser->emit(parser, parser->tokens);
+ if (parser->tokens) {
+ QDECREF(parser->tokens);
+ }
+ parser->tokens = qlist_new();
+ parser->token_size = 0;
+}
+
+void json_message_parser_init(JSONMessageParser *parser,
+ void (*func)(JSONMessageParser *, QList *))
+{
+ parser->emit = func;
+ parser->brace_count = 0;
+ parser->bracket_count = 0;
+ parser->tokens = qlist_new();
+ parser->token_size = 0;
+
+ json_lexer_init(&parser->lexer, json_message_process_token);
+}
+
+int json_message_parser_feed(JSONMessageParser *parser,
+ const char *buffer, size_t size)
+{
+ return json_lexer_feed(&parser->lexer, buffer, size);
+}
+
+int json_message_parser_flush(JSONMessageParser *parser)
+{
+ return json_lexer_flush(&parser->lexer);
+}
+
+void json_message_parser_destroy(JSONMessageParser *parser)
+{
+ json_lexer_destroy(&parser->lexer);
+ QDECREF(parser->tokens);
+}
diff --git a/contrib/qemu/qobject/qbool.c b/contrib/qemu/qobject/qbool.c
new file mode 100644
index 000000000..a3d2afa82
--- /dev/null
+++ b/contrib/qemu/qobject/qbool.c
@@ -0,0 +1,68 @@
+/*
+ * QBool Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qbool_destroy_obj(QObject *obj);
+
+static const QType qbool_type = {
+ .code = QTYPE_QBOOL,
+ .destroy = qbool_destroy_obj,
+};
+
+/**
+ * qbool_from_int(): Create a new QBool from an int
+ *
+ * Return strong reference.
+ */
+QBool *qbool_from_int(int value)
+{
+ QBool *qb;
+
+ qb = g_malloc(sizeof(*qb));
+ qb->value = value;
+ QOBJECT_INIT(qb, &qbool_type);
+
+ return qb;
+}
+
+/**
+ * qbool_get_int(): Get the stored int
+ */
+int qbool_get_int(const QBool *qb)
+{
+ return qb->value;
+}
+
+/**
+ * qobject_to_qbool(): Convert a QObject into a QBool
+ */
+QBool *qobject_to_qbool(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QBOOL)
+ return NULL;
+
+ return container_of(obj, QBool, base);
+}
+
+/**
+ * qbool_destroy_obj(): Free all memory allocated by a
+ * QBool object
+ */
+static void qbool_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qbool(obj));
+}
diff --git a/contrib/qemu/qobject/qdict.c b/contrib/qemu/qobject/qdict.c
new file mode 100644
index 000000000..ed381f9a5
--- /dev/null
+++ b/contrib/qemu/qobject/qdict.c
@@ -0,0 +1,478 @@
+/*
+ * QDict Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+#include "qemu-common.h"
+
+static void qdict_destroy_obj(QObject *obj);
+
+static const QType qdict_type = {
+ .code = QTYPE_QDICT,
+ .destroy = qdict_destroy_obj,
+};
+
+/**
+ * qdict_new(): Create a new QDict
+ *
+ * Return strong reference.
+ */
+QDict *qdict_new(void)
+{
+ QDict *qdict;
+
+ qdict = g_malloc0(sizeof(*qdict));
+ QOBJECT_INIT(qdict, &qdict_type);
+
+ return qdict;
+}
+
+/**
+ * qobject_to_qdict(): Convert a QObject into a QDict
+ */
+QDict *qobject_to_qdict(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QDICT)
+ return NULL;
+
+ return container_of(obj, QDict, base);
+}
+
+/**
+ * tdb_hash(): based on the hash agorithm from gdbm, via tdb
+ * (from module-init-tools)
+ */
+static unsigned int tdb_hash(const char *name)
+{
+ unsigned value; /* Used to compute the hash value. */
+ unsigned i; /* Used to cycle through random values. */
+
+ /* Set the initial value from the key size. */
+ for (value = 0x238F13AF * strlen(name), i=0; name[i]; i++)
+ value = (value + (((const unsigned char *)name)[i] << (i*5 % 24)));
+
+ return (1103515243 * value + 12345);
+}
+
+/**
+ * alloc_entry(): allocate a new QDictEntry
+ */
+static QDictEntry *alloc_entry(const char *key, QObject *value)
+{
+ QDictEntry *entry;
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->key = g_strdup(key);
+ entry->value = value;
+
+ return entry;
+}
+
+/**
+ * qdict_entry_value(): Return qdict entry value
+ *
+ * Return weak reference.
+ */
+QObject *qdict_entry_value(const QDictEntry *entry)
+{
+ return entry->value;
+}
+
+/**
+ * qdict_entry_key(): Return qdict entry key
+ *
+ * Return a *pointer* to the string, it has to be duplicated before being
+ * stored.
+ */
+const char *qdict_entry_key(const QDictEntry *entry)
+{
+ return entry->key;
+}
+
+/**
+ * qdict_find(): List lookup function
+ */
+static QDictEntry *qdict_find(const QDict *qdict,
+ const char *key, unsigned int bucket)
+{
+ QDictEntry *entry;
+
+ QLIST_FOREACH(entry, &qdict->table[bucket], next)
+ if (!strcmp(entry->key, key))
+ return entry;
+
+ return NULL;
+}
+
+/**
+ * qdict_put_obj(): Put a new QObject into the dictionary
+ *
+ * Insert the pair 'key:value' into 'qdict', if 'key' already exists
+ * its 'value' will be replaced.
+ *
+ * This is done by freeing the reference to the stored QObject and
+ * storing the new one in the same entry.
+ *
+ * NOTE: ownership of 'value' is transferred to the QDict
+ */
+void qdict_put_obj(QDict *qdict, const char *key, QObject *value)
+{
+ unsigned int bucket;
+ QDictEntry *entry;
+
+ bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+ entry = qdict_find(qdict, key, bucket);
+ if (entry) {
+ /* replace key's value */
+ qobject_decref(entry->value);
+ entry->value = value;
+ } else {
+ /* allocate a new entry */
+ entry = alloc_entry(key, value);
+ QLIST_INSERT_HEAD(&qdict->table[bucket], entry, next);
+ qdict->size++;
+ }
+}
+
+/**
+ * qdict_get(): Lookup for a given 'key'
+ *
+ * Return a weak reference to the QObject associated with 'key' if
+ * 'key' is present in the dictionary, NULL otherwise.
+ */
+QObject *qdict_get(const QDict *qdict, const char *key)
+{
+ QDictEntry *entry;
+
+ entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+ return (entry == NULL ? NULL : entry->value);
+}
+
+/**
+ * qdict_haskey(): Check if 'key' exists
+ *
+ * Return 1 if 'key' exists in the dict, 0 otherwise
+ */
+int qdict_haskey(const QDict *qdict, const char *key)
+{
+ unsigned int bucket = tdb_hash(key) % QDICT_BUCKET_MAX;
+ return (qdict_find(qdict, key, bucket) == NULL ? 0 : 1);
+}
+
+/**
+ * qdict_size(): Return the size of the dictionary
+ */
+size_t qdict_size(const QDict *qdict)
+{
+ return qdict->size;
+}
+
+/**
+ * qdict_get_obj(): Get a QObject of a specific type
+ */
+static QObject *qdict_get_obj(const QDict *qdict, const char *key,
+ qtype_code type)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ assert(obj != NULL);
+ assert(qobject_type(obj) == type);
+
+ return obj;
+}
+
+/**
+ * qdict_get_double(): Get an number mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QFloat or QInt object.
+ *
+ * Return number mapped by 'key'.
+ */
+double qdict_get_double(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get(qdict, key);
+
+ assert(obj);
+ switch (qobject_type(obj)) {
+ case QTYPE_QFLOAT:
+ return qfloat_get_double(qobject_to_qfloat(obj));
+ case QTYPE_QINT:
+ return qint_get_int(qobject_to_qint(obj));
+ default:
+ abort();
+ }
+}
+
+/**
+ * qdict_get_int(): Get an integer mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QInt object.
+ *
+ * Return integer mapped by 'key'.
+ */
+int64_t qdict_get_int(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QINT);
+ return qint_get_int(qobject_to_qint(obj));
+}
+
+/**
+ * qdict_get_bool(): Get a bool mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QBool object.
+ *
+ * Return bool mapped by 'key'.
+ */
+int qdict_get_bool(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QBOOL);
+ return qbool_get_int(qobject_to_qbool(obj));
+}
+
+/**
+ * qdict_get_qlist(): Get the QList mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QList object.
+ *
+ * Return QList mapped by 'key'.
+ */
+QList *qdict_get_qlist(const QDict *qdict, const char *key)
+{
+ return qobject_to_qlist(qdict_get_obj(qdict, key, QTYPE_QLIST));
+}
+
+/**
+ * qdict_get_qdict(): Get the QDict mapped by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QDict object.
+ *
+ * Return QDict mapped by 'key'.
+ */
+QDict *qdict_get_qdict(const QDict *qdict, const char *key)
+{
+ return qobject_to_qdict(qdict_get_obj(qdict, key, QTYPE_QDICT));
+}
+
+/**
+ * qdict_get_str(): Get a pointer to the stored string mapped
+ * by 'key'
+ *
+ * This function assumes that 'key' exists and it stores a
+ * QString object.
+ *
+ * Return pointer to the string mapped by 'key'.
+ */
+const char *qdict_get_str(const QDict *qdict, const char *key)
+{
+ QObject *obj = qdict_get_obj(qdict, key, QTYPE_QSTRING);
+ return qstring_get_str(qobject_to_qstring(obj));
+}
+
+/**
+ * qdict_get_try_int(): Try to get integer mapped by 'key'
+ *
+ * Return integer mapped by 'key', if it is not present in
+ * the dictionary or if the stored object is not of QInt type
+ * 'def_value' will be returned.
+ */
+int64_t qdict_get_try_int(const QDict *qdict, const char *key,
+ int64_t def_value)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QINT)
+ return def_value;
+
+ return qint_get_int(qobject_to_qint(obj));
+}
+
+/**
+ * qdict_get_try_bool(): Try to get a bool mapped by 'key'
+ *
+ * Return bool mapped by 'key', if it is not present in the
+ * dictionary or if the stored object is not of QBool type
+ * 'def_value' will be returned.
+ */
+int qdict_get_try_bool(const QDict *qdict, const char *key, int def_value)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QBOOL)
+ return def_value;
+
+ return qbool_get_int(qobject_to_qbool(obj));
+}
+
+/**
+ * qdict_get_try_str(): Try to get a pointer to the stored string
+ * mapped by 'key'
+ *
+ * Return a pointer to the string mapped by 'key', if it is not present
+ * in the dictionary or if the stored object is not of QString type
+ * NULL will be returned.
+ */
+const char *qdict_get_try_str(const QDict *qdict, const char *key)
+{
+ QObject *obj;
+
+ obj = qdict_get(qdict, key);
+ if (!obj || qobject_type(obj) != QTYPE_QSTRING)
+ return NULL;
+
+ return qstring_get_str(qobject_to_qstring(obj));
+}
+
+/**
+ * qdict_iter(): Iterate over all the dictionary's stored values.
+ *
+ * This function allows the user to provide an iterator, which will be
+ * called for each stored value in the dictionary.
+ */
+void qdict_iter(const QDict *qdict,
+ void (*iter)(const char *key, QObject *obj, void *opaque),
+ void *opaque)
+{
+ int i;
+ QDictEntry *entry;
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QLIST_FOREACH(entry, &qdict->table[i], next)
+ iter(entry->key, entry->value, opaque);
+ }
+}
+
+static QDictEntry *qdict_next_entry(const QDict *qdict, int first_bucket)
+{
+ int i;
+
+ for (i = first_bucket; i < QDICT_BUCKET_MAX; i++) {
+ if (!QLIST_EMPTY(&qdict->table[i])) {
+ return QLIST_FIRST(&qdict->table[i]);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * qdict_first(): Return first qdict entry for iteration.
+ */
+const QDictEntry *qdict_first(const QDict *qdict)
+{
+ return qdict_next_entry(qdict, 0);
+}
+
+/**
+ * qdict_next(): Return next qdict entry in an iteration.
+ */
+const QDictEntry *qdict_next(const QDict *qdict, const QDictEntry *entry)
+{
+ QDictEntry *ret;
+
+ ret = QLIST_NEXT(entry, next);
+ if (!ret) {
+ unsigned int bucket = tdb_hash(entry->key) % QDICT_BUCKET_MAX;
+ ret = qdict_next_entry(qdict, bucket + 1);
+ }
+
+ return ret;
+}
+
+/**
+ * qdict_clone_shallow(): Clones a given QDict. Its entries are not copied, but
+ * another reference is added.
+ */
+QDict *qdict_clone_shallow(const QDict *src)
+{
+ QDict *dest;
+ QDictEntry *entry;
+ int i;
+
+ dest = qdict_new();
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QLIST_FOREACH(entry, &src->table[i], next) {
+ qobject_incref(entry->value);
+ qdict_put_obj(dest, entry->key, entry->value);
+ }
+ }
+
+ return dest;
+}
+
+/**
+ * qentry_destroy(): Free all the memory allocated by a QDictEntry
+ */
+static void qentry_destroy(QDictEntry *e)
+{
+ assert(e != NULL);
+ assert(e->key != NULL);
+ assert(e->value != NULL);
+
+ qobject_decref(e->value);
+ g_free(e->key);
+ g_free(e);
+}
+
+/**
+ * qdict_del(): Delete a 'key:value' pair from the dictionary
+ *
+ * This will destroy all data allocated by this entry.
+ */
+void qdict_del(QDict *qdict, const char *key)
+{
+ QDictEntry *entry;
+
+ entry = qdict_find(qdict, key, tdb_hash(key) % QDICT_BUCKET_MAX);
+ if (entry) {
+ QLIST_REMOVE(entry, next);
+ qentry_destroy(entry);
+ qdict->size--;
+ }
+}
+
+/**
+ * qdict_destroy_obj(): Free all the memory allocated by a QDict
+ */
+static void qdict_destroy_obj(QObject *obj)
+{
+ int i;
+ QDict *qdict;
+
+ assert(obj != NULL);
+ qdict = qobject_to_qdict(obj);
+
+ for (i = 0; i < QDICT_BUCKET_MAX; i++) {
+ QDictEntry *entry = QLIST_FIRST(&qdict->table[i]);
+ while (entry) {
+ QDictEntry *tmp = QLIST_NEXT(entry, next);
+ QLIST_REMOVE(entry, next);
+ qentry_destroy(entry);
+ entry = tmp;
+ }
+ }
+
+ g_free(qdict);
+}
diff --git a/contrib/qemu/qobject/qerror.c b/contrib/qemu/qobject/qerror.c
new file mode 100644
index 000000000..3aee1cf6a
--- /dev/null
+++ b/contrib/qemu/qobject/qerror.c
@@ -0,0 +1,156 @@
+/*
+ * QError Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "monitor/monitor.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu-common.h"
+
+static void qerror_destroy_obj(QObject *obj);
+
+static const QType qerror_type = {
+ .code = QTYPE_QERROR,
+ .destroy = qerror_destroy_obj,
+};
+
+/**
+ * qerror_new(): Create a new QError
+ *
+ * Return strong reference.
+ */
+static QError *qerror_new(void)
+{
+ QError *qerr;
+
+ qerr = g_malloc0(sizeof(*qerr));
+ QOBJECT_INIT(qerr, &qerror_type);
+
+ return qerr;
+}
+
+/**
+ * qerror_from_info(): Create a new QError from error information
+ *
+ * Return strong reference.
+ */
+static QError *qerror_from_info(ErrorClass err_class, const char *fmt,
+ va_list *va)
+{
+ QError *qerr;
+
+ qerr = qerror_new();
+ loc_save(&qerr->loc);
+
+ qerr->err_msg = g_strdup_vprintf(fmt, *va);
+ qerr->err_class = err_class;
+
+ return qerr;
+}
+
+/**
+ * qerror_human(): Format QError data into human-readable string.
+ */
+QString *qerror_human(const QError *qerror)
+{
+ return qstring_from_str(qerror->err_msg);
+}
+
+/**
+ * qerror_print(): Print QError data
+ *
+ * This function will print the member 'desc' of the specified QError object,
+ * it uses error_report() for this, so that the output is routed to the right
+ * place (ie. stderr or Monitor's device).
+ */
+static void qerror_print(QError *qerror)
+{
+ QString *qstring = qerror_human(qerror);
+ loc_push_restore(&qerror->loc);
+ error_report("%s", qstring_get_str(qstring));
+ loc_pop(&qerror->loc);
+ QDECREF(qstring);
+}
+
+void qerror_report(ErrorClass eclass, const char *fmt, ...)
+{
+ va_list va;
+ QError *qerror;
+
+ va_start(va, fmt);
+ qerror = qerror_from_info(eclass, fmt, &va);
+ va_end(va);
+
+ if (monitor_cur_is_qmp()) {
+ monitor_set_error(cur_mon, qerror);
+ } else {
+ qerror_print(qerror);
+ QDECREF(qerror);
+ }
+}
+
+/* Evil... */
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+};
+
+void qerror_report_err(Error *err)
+{
+ QError *qerr;
+
+ qerr = qerror_new();
+ loc_save(&qerr->loc);
+ qerr->err_msg = g_strdup(err->msg);
+ qerr->err_class = err->err_class;
+
+ if (monitor_cur_is_qmp()) {
+ monitor_set_error(cur_mon, qerr);
+ } else {
+ qerror_print(qerr);
+ QDECREF(qerr);
+ }
+}
+
+void assert_no_error(Error *err)
+{
+ if (err) {
+ qerror_report_err(err);
+ abort();
+ }
+}
+
+/**
+ * qobject_to_qerror(): Convert a QObject into a QError
+ */
+static QError *qobject_to_qerror(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QERROR) {
+ return NULL;
+ }
+
+ return container_of(obj, QError, base);
+}
+
+/**
+ * qerror_destroy_obj(): Free all memory allocated by a QError
+ */
+static void qerror_destroy_obj(QObject *obj)
+{
+ QError *qerr;
+
+ assert(obj != NULL);
+ qerr = qobject_to_qerror(obj);
+
+ g_free(qerr->err_msg);
+ g_free(qerr);
+}
diff --git a/contrib/qemu/qobject/qfloat.c b/contrib/qemu/qobject/qfloat.c
new file mode 100644
index 000000000..7de0992db
--- /dev/null
+++ b/contrib/qemu/qobject/qfloat.c
@@ -0,0 +1,68 @@
+/*
+ * QFloat Module
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qfloat_destroy_obj(QObject *obj);
+
+static const QType qfloat_type = {
+ .code = QTYPE_QFLOAT,
+ .destroy = qfloat_destroy_obj,
+};
+
+/**
+ * qfloat_from_int(): Create a new QFloat from a float
+ *
+ * Return strong reference.
+ */
+QFloat *qfloat_from_double(double value)
+{
+ QFloat *qf;
+
+ qf = g_malloc(sizeof(*qf));
+ qf->value = value;
+ QOBJECT_INIT(qf, &qfloat_type);
+
+ return qf;
+}
+
+/**
+ * qfloat_get_double(): Get the stored float
+ */
+double qfloat_get_double(const QFloat *qf)
+{
+ return qf->value;
+}
+
+/**
+ * qobject_to_qfloat(): Convert a QObject into a QFloat
+ */
+QFloat *qobject_to_qfloat(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QFLOAT)
+ return NULL;
+
+ return container_of(obj, QFloat, base);
+}
+
+/**
+ * qfloat_destroy_obj(): Free all memory allocated by a
+ * QFloat object
+ */
+static void qfloat_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qfloat(obj));
+}
diff --git a/contrib/qemu/qobject/qint.c b/contrib/qemu/qobject/qint.c
new file mode 100644
index 000000000..86b9b04f0
--- /dev/null
+++ b/contrib/qemu/qobject/qint.c
@@ -0,0 +1,67 @@
+/*
+ * QInt Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu-common.h"
+
+static void qint_destroy_obj(QObject *obj);
+
+static const QType qint_type = {
+ .code = QTYPE_QINT,
+ .destroy = qint_destroy_obj,
+};
+
+/**
+ * qint_from_int(): Create a new QInt from an int64_t
+ *
+ * Return strong reference.
+ */
+QInt *qint_from_int(int64_t value)
+{
+ QInt *qi;
+
+ qi = g_malloc(sizeof(*qi));
+ qi->value = value;
+ QOBJECT_INIT(qi, &qint_type);
+
+ return qi;
+}
+
+/**
+ * qint_get_int(): Get the stored integer
+ */
+int64_t qint_get_int(const QInt *qi)
+{
+ return qi->value;
+}
+
+/**
+ * qobject_to_qint(): Convert a QObject into a QInt
+ */
+QInt *qobject_to_qint(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QINT)
+ return NULL;
+
+ return container_of(obj, QInt, base);
+}
+
+/**
+ * qint_destroy_obj(): Free all memory allocated by a
+ * QInt object
+ */
+static void qint_destroy_obj(QObject *obj)
+{
+ assert(obj != NULL);
+ g_free(qobject_to_qint(obj));
+}
diff --git a/contrib/qemu/qobject/qjson.c b/contrib/qemu/qobject/qjson.c
new file mode 100644
index 000000000..19085a1bb
--- /dev/null
+++ b/contrib/qemu/qobject/qjson.c
@@ -0,0 +1,282 @@
+/*
+ * QObject JSON integration
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qapi/qmp/json-lexer.h"
+#include "qapi/qmp/json-parser.h"
+#include "qapi/qmp/json-streamer.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qfloat.h"
+#include "qapi/qmp/qdict.h"
+
+typedef struct JSONParsingState
+{
+ JSONMessageParser parser;
+ va_list *ap;
+ QObject *result;
+} JSONParsingState;
+
+static void parse_json(JSONMessageParser *parser, QList *tokens)
+{
+ JSONParsingState *s = container_of(parser, JSONParsingState, parser);
+ s->result = json_parser_parse(tokens, s->ap);
+}
+
+QObject *qobject_from_jsonv(const char *string, va_list *ap)
+{
+ JSONParsingState state = {};
+
+ state.ap = ap;
+
+ json_message_parser_init(&state.parser, parse_json);
+ json_message_parser_feed(&state.parser, string, strlen(string));
+ json_message_parser_flush(&state.parser);
+ json_message_parser_destroy(&state.parser);
+
+ return state.result;
+}
+
+QObject *qobject_from_json(const char *string)
+{
+ return qobject_from_jsonv(string, NULL);
+}
+
+/*
+ * IMPORTANT: This function aborts on error, thus it must not
+ * be used with untrusted arguments.
+ */
+QObject *qobject_from_jsonf(const char *string, ...)
+{
+ QObject *obj;
+ va_list ap;
+
+ va_start(ap, string);
+ obj = qobject_from_jsonv(string, &ap);
+ va_end(ap);
+
+ assert(obj != NULL);
+ return obj;
+}
+
+typedef struct ToJsonIterState
+{
+ int indent;
+ int pretty;
+ int count;
+ QString *str;
+} ToJsonIterState;
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent);
+
+static void to_json_dict_iter(const char *key, QObject *obj, void *opaque)
+{
+ ToJsonIterState *s = opaque;
+ QString *qkey;
+ int j;
+
+ if (s->count)
+ qstring_append(s->str, ", ");
+
+ if (s->pretty) {
+ qstring_append(s->str, "\n");
+ for (j = 0 ; j < s->indent ; j++)
+ qstring_append(s->str, " ");
+ }
+
+ qkey = qstring_from_str(key);
+ to_json(QOBJECT(qkey), s->str, s->pretty, s->indent);
+ QDECREF(qkey);
+
+ qstring_append(s->str, ": ");
+ to_json(obj, s->str, s->pretty, s->indent);
+ s->count++;
+}
+
+static void to_json_list_iter(QObject *obj, void *opaque)
+{
+ ToJsonIterState *s = opaque;
+ int j;
+
+ if (s->count)
+ qstring_append(s->str, ", ");
+
+ if (s->pretty) {
+ qstring_append(s->str, "\n");
+ for (j = 0 ; j < s->indent ; j++)
+ qstring_append(s->str, " ");
+ }
+
+ to_json(obj, s->str, s->pretty, s->indent);
+ s->count++;
+}
+
+static void to_json(const QObject *obj, QString *str, int pretty, int indent)
+{
+ switch (qobject_type(obj)) {
+ case QTYPE_QINT: {
+ QInt *val = qobject_to_qint(obj);
+ char buffer[1024];
+
+ snprintf(buffer, sizeof(buffer), "%" PRId64, qint_get_int(val));
+ qstring_append(str, buffer);
+ break;
+ }
+ case QTYPE_QSTRING: {
+ QString *val = qobject_to_qstring(obj);
+ const char *ptr;
+ int cp;
+ char buf[16];
+ char *end;
+
+ ptr = qstring_get_str(val);
+ qstring_append(str, "\"");
+
+ for (; *ptr; ptr = end) {
+ cp = mod_utf8_codepoint(ptr, 6, &end);
+ switch (cp) {
+ case '\"':
+ qstring_append(str, "\\\"");
+ break;
+ case '\\':
+ qstring_append(str, "\\\\");
+ break;
+ case '\b':
+ qstring_append(str, "\\b");
+ break;
+ case '\f':
+ qstring_append(str, "\\f");
+ break;
+ case '\n':
+ qstring_append(str, "\\n");
+ break;
+ case '\r':
+ qstring_append(str, "\\r");
+ break;
+ case '\t':
+ qstring_append(str, "\\t");
+ break;
+ default:
+ if (cp < 0) {
+ cp = 0xFFFD; /* replacement character */
+ }
+ if (cp > 0xFFFF) {
+ /* beyond BMP; need a surrogate pair */
+ snprintf(buf, sizeof(buf), "\\u%04X\\u%04X",
+ 0xD800 + ((cp - 0x10000) >> 10),
+ 0xDC00 + ((cp - 0x10000) & 0x3FF));
+ } else if (cp < 0x20 || cp >= 0x7F) {
+ snprintf(buf, sizeof(buf), "\\u%04X", cp);
+ } else {
+ buf[0] = cp;
+ buf[1] = 0;
+ }
+ qstring_append(str, buf);
+ }
+ };
+
+ qstring_append(str, "\"");
+ break;
+ }
+ case QTYPE_QDICT: {
+ ToJsonIterState s;
+ QDict *val = qobject_to_qdict(obj);
+
+ s.count = 0;
+ s.str = str;
+ s.indent = indent + 1;
+ s.pretty = pretty;
+ qstring_append(str, "{");
+ qdict_iter(val, to_json_dict_iter, &s);
+ if (pretty) {
+ int j;
+ qstring_append(str, "\n");
+ for (j = 0 ; j < indent ; j++)
+ qstring_append(str, " ");
+ }
+ qstring_append(str, "}");
+ break;
+ }
+ case QTYPE_QLIST: {
+ ToJsonIterState s;
+ QList *val = qobject_to_qlist(obj);
+
+ s.count = 0;
+ s.str = str;
+ s.indent = indent + 1;
+ s.pretty = pretty;
+ qstring_append(str, "[");
+ qlist_iter(val, (void *)to_json_list_iter, &s);
+ if (pretty) {
+ int j;
+ qstring_append(str, "\n");
+ for (j = 0 ; j < indent ; j++)
+ qstring_append(str, " ");
+ }
+ qstring_append(str, "]");
+ break;
+ }
+ case QTYPE_QFLOAT: {
+ QFloat *val = qobject_to_qfloat(obj);
+ char buffer[1024];
+ int len;
+
+ len = snprintf(buffer, sizeof(buffer), "%f", qfloat_get_double(val));
+ while (len > 0 && buffer[len - 1] == '0') {
+ len--;
+ }
+
+ if (len && buffer[len - 1] == '.') {
+ buffer[len - 1] = 0;
+ } else {
+ buffer[len] = 0;
+ }
+
+ qstring_append(str, buffer);
+ break;
+ }
+ case QTYPE_QBOOL: {
+ QBool *val = qobject_to_qbool(obj);
+
+ if (qbool_get_int(val)) {
+ qstring_append(str, "true");
+ } else {
+ qstring_append(str, "false");
+ }
+ break;
+ }
+ case QTYPE_QERROR:
+ /* XXX: should QError be emitted? */
+ case QTYPE_NONE:
+ break;
+ }
+}
+
+QString *qobject_to_json(const QObject *obj)
+{
+ QString *str = qstring_new();
+
+ to_json(obj, str, 0, 0);
+
+ return str;
+}
+
+QString *qobject_to_json_pretty(const QObject *obj)
+{
+ QString *str = qstring_new();
+
+ to_json(obj, str, 1, 0);
+
+ return str;
+}
diff --git a/contrib/qemu/qobject/qlist.c b/contrib/qemu/qobject/qlist.c
new file mode 100644
index 000000000..1ced0de58
--- /dev/null
+++ b/contrib/qemu/qobject/qlist.c
@@ -0,0 +1,170 @@
+/*
+ * QList Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qobject.h"
+#include "qemu/queue.h"
+#include "qemu-common.h"
+
+static void qlist_destroy_obj(QObject *obj);
+
+static const QType qlist_type = {
+ .code = QTYPE_QLIST,
+ .destroy = qlist_destroy_obj,
+};
+
+/**
+ * qlist_new(): Create a new QList
+ *
+ * Return strong reference.
+ */
+QList *qlist_new(void)
+{
+ QList *qlist;
+
+ qlist = g_malloc(sizeof(*qlist));
+ QTAILQ_INIT(&qlist->head);
+ QOBJECT_INIT(qlist, &qlist_type);
+
+ return qlist;
+}
+
+static void qlist_copy_elem(QObject *obj, void *opaque)
+{
+ QList *dst = opaque;
+
+ qobject_incref(obj);
+ qlist_append_obj(dst, obj);
+}
+
+QList *qlist_copy(QList *src)
+{
+ QList *dst = qlist_new();
+
+ qlist_iter(src, qlist_copy_elem, dst);
+
+ return dst;
+}
+
+/**
+ * qlist_append_obj(): Append an QObject into QList
+ *
+ * NOTE: ownership of 'value' is transferred to the QList
+ */
+void qlist_append_obj(QList *qlist, QObject *value)
+{
+ QListEntry *entry;
+
+ entry = g_malloc(sizeof(*entry));
+ entry->value = value;
+
+ QTAILQ_INSERT_TAIL(&qlist->head, entry, next);
+}
+
+/**
+ * qlist_iter(): Iterate over all the list's stored values.
+ *
+ * This function allows the user to provide an iterator, which will be
+ * called for each stored value in the list.
+ */
+void qlist_iter(const QList *qlist,
+ void (*iter)(QObject *obj, void *opaque), void *opaque)
+{
+ QListEntry *entry;
+
+ QTAILQ_FOREACH(entry, &qlist->head, next)
+ iter(entry->value, opaque);
+}
+
+QObject *qlist_pop(QList *qlist)
+{
+ QListEntry *entry;
+ QObject *ret;
+
+ if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+ return NULL;
+ }
+
+ entry = QTAILQ_FIRST(&qlist->head);
+ QTAILQ_REMOVE(&qlist->head, entry, next);
+
+ ret = entry->value;
+ g_free(entry);
+
+ return ret;
+}
+
+QObject *qlist_peek(QList *qlist)
+{
+ QListEntry *entry;
+ QObject *ret;
+
+ if (qlist == NULL || QTAILQ_EMPTY(&qlist->head)) {
+ return NULL;
+ }
+
+ entry = QTAILQ_FIRST(&qlist->head);
+
+ ret = entry->value;
+
+ return ret;
+}
+
+int qlist_empty(const QList *qlist)
+{
+ return QTAILQ_EMPTY(&qlist->head);
+}
+
+static void qlist_size_iter(QObject *obj, void *opaque)
+{
+ size_t *count = opaque;
+ (*count)++;
+}
+
+size_t qlist_size(const QList *qlist)
+{
+ size_t count = 0;
+ qlist_iter(qlist, qlist_size_iter, &count);
+ return count;
+}
+
+/**
+ * qobject_to_qlist(): Convert a QObject into a QList
+ */
+QList *qobject_to_qlist(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QLIST) {
+ return NULL;
+ }
+
+ return container_of(obj, QList, base);
+}
+
+/**
+ * qlist_destroy_obj(): Free all the memory allocated by a QList
+ */
+static void qlist_destroy_obj(QObject *obj)
+{
+ QList *qlist;
+ QListEntry *entry, *next_entry;
+
+ assert(obj != NULL);
+ qlist = qobject_to_qlist(obj);
+
+ QTAILQ_FOREACH_SAFE(entry, &qlist->head, next, next_entry) {
+ QTAILQ_REMOVE(&qlist->head, entry, next);
+ qobject_decref(entry->value);
+ g_free(entry);
+ }
+
+ g_free(qlist);
+}
diff --git a/contrib/qemu/qobject/qstring.c b/contrib/qemu/qobject/qstring.c
new file mode 100644
index 000000000..607b7a142
--- /dev/null
+++ b/contrib/qemu/qobject/qstring.c
@@ -0,0 +1,149 @@
+/*
+ * QString Module
+ *
+ * Copyright (C) 2009 Red Hat Inc.
+ *
+ * Authors:
+ * Luiz Capitulino <lcapitulino@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qapi/qmp/qobject.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu-common.h"
+
+static void qstring_destroy_obj(QObject *obj);
+
+static const QType qstring_type = {
+ .code = QTYPE_QSTRING,
+ .destroy = qstring_destroy_obj,
+};
+
+/**
+ * qstring_new(): Create a new empty QString
+ *
+ * Return strong reference.
+ */
+QString *qstring_new(void)
+{
+ return qstring_from_str("");
+}
+
+/**
+ * qstring_get_length(): Get the length of a QString
+ */
+size_t qstring_get_length(const QString *qstring)
+{
+ return qstring->length;
+}
+
+/**
+ * qstring_from_substr(): Create a new QString from a C string substring
+ *
+ * Return string reference
+ */
+QString *qstring_from_substr(const char *str, int start, int end)
+{
+ QString *qstring;
+
+ qstring = g_malloc(sizeof(*qstring));
+
+ qstring->length = end - start + 1;
+ qstring->capacity = qstring->length;
+
+ qstring->string = g_malloc(qstring->capacity + 1);
+ memcpy(qstring->string, str + start, qstring->length);
+ qstring->string[qstring->length] = 0;
+
+ QOBJECT_INIT(qstring, &qstring_type);
+
+ return qstring;
+}
+
+/**
+ * qstring_from_str(): Create a new QString from a regular C string
+ *
+ * Return strong reference.
+ */
+QString *qstring_from_str(const char *str)
+{
+ return qstring_from_substr(str, 0, strlen(str) - 1);
+}
+
+static void capacity_increase(QString *qstring, size_t len)
+{
+ if (qstring->capacity < (qstring->length + len)) {
+ qstring->capacity += len;
+ qstring->capacity *= 2; /* use exponential growth */
+
+ qstring->string = g_realloc(qstring->string, qstring->capacity + 1);
+ }
+}
+
+/* qstring_append(): Append a C string to a QString
+ */
+void qstring_append(QString *qstring, const char *str)
+{
+ size_t len = strlen(str);
+
+ capacity_increase(qstring, len);
+ memcpy(qstring->string + qstring->length, str, len);
+ qstring->length += len;
+ qstring->string[qstring->length] = 0;
+}
+
+void qstring_append_int(QString *qstring, int64_t value)
+{
+ char num[32];
+
+ snprintf(num, sizeof(num), "%" PRId64, value);
+ qstring_append(qstring, num);
+}
+
+/**
+ * qstring_append_chr(): Append a C char to a QString
+ */
+void qstring_append_chr(QString *qstring, int c)
+{
+ capacity_increase(qstring, 1);
+ qstring->string[qstring->length++] = c;
+ qstring->string[qstring->length] = 0;
+}
+
+/**
+ * qobject_to_qstring(): Convert a QObject to a QString
+ */
+QString *qobject_to_qstring(const QObject *obj)
+{
+ if (qobject_type(obj) != QTYPE_QSTRING)
+ return NULL;
+
+ return container_of(obj, QString, base);
+}
+
+/**
+ * qstring_get_str(): Return a pointer to the stored string
+ *
+ * NOTE: Should be used with caution, if the object is deallocated
+ * this pointer becomes invalid.
+ */
+const char *qstring_get_str(const QString *qstring)
+{
+ return qstring->string;
+}
+
+/**
+ * qstring_destroy_obj(): Free all memory allocated by a QString
+ * object
+ */
+static void qstring_destroy_obj(QObject *obj)
+{
+ QString *qs;
+
+ assert(obj != NULL);
+ qs = qobject_to_qstring(obj);
+ g_free(qs->string);
+ g_free(qs);
+}
diff --git a/contrib/qemu/trace/generated-tracers.h b/contrib/qemu/trace/generated-tracers.h
new file mode 100644
index 000000000..b512660f3
--- /dev/null
+++ b/contrib/qemu/trace/generated-tracers.h
@@ -0,0 +1,3759 @@
+/* This file is autogenerated by tracetool, do not edit. */
+
+#ifndef TRACE__GENERATED_TRACERS_H
+#define TRACE__GENERATED_TRACERS_H
+
+#include "qemu-common.h"
+
+static inline void trace_qxl_interface_set_mm_time(int qid, uint32_t mm_time)
+{
+}
+
+static inline void trace_qxl_io_write_vga(int qid, const char * mode, uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_g_malloc(size_t size, void * ptr)
+{
+}
+
+static inline void trace_g_realloc(void * ptr, size_t size, void * newptr)
+{
+}
+
+static inline void trace_g_free(void * ptr)
+{
+}
+
+static inline void trace_qemu_memalign(size_t alignment, size_t size, void * ptr)
+{
+}
+
+static inline void trace_qemu_anon_ram_alloc(size_t size, void * ptr)
+{
+}
+
+static inline void trace_qemu_vfree(void * ptr)
+{
+}
+
+static inline void trace_qemu_anon_ram_free(void * ptr, size_t size)
+{
+}
+
+static inline void trace_virtqueue_fill(void * vq, const void * elem, unsigned int len, unsigned int idx)
+{
+}
+
+static inline void trace_virtqueue_flush(void * vq, unsigned int count)
+{
+}
+
+static inline void trace_virtqueue_pop(void * vq, void * elem, unsigned int in_num, unsigned int out_num)
+{
+}
+
+static inline void trace_virtio_queue_notify(void * vdev, int n, void * vq)
+{
+}
+
+static inline void trace_virtio_irq(void * vq)
+{
+}
+
+static inline void trace_virtio_notify(void * vdev, void * vq)
+{
+}
+
+static inline void trace_virtio_set_status(void * vdev, uint8_t val)
+{
+}
+
+static inline void trace_virtio_serial_send_control_event(unsigned int port, uint16_t event, uint16_t value)
+{
+}
+
+static inline void trace_virtio_serial_throttle_port(unsigned int port, bool throttle)
+{
+}
+
+static inline void trace_virtio_serial_handle_control_message(uint16_t event, uint16_t value)
+{
+}
+
+static inline void trace_virtio_serial_handle_control_message_port(unsigned int port)
+{
+}
+
+static inline void trace_virtio_console_flush_buf(unsigned int port, size_t len, ssize_t ret)
+{
+}
+
+static inline void trace_virtio_console_chr_read(unsigned int port, int size)
+{
+}
+
+static inline void trace_virtio_console_chr_event(unsigned int port, int event)
+{
+}
+
+static inline void trace_bdrv_open_common(void * bs, const char * filename, int flags, const char * format_name)
+{
+}
+
+static inline void trace_multiwrite_cb(void * mcb, int ret)
+{
+}
+
+static inline void trace_bdrv_aio_multiwrite(void * mcb, int num_callbacks, int num_reqs)
+{
+}
+
+static inline void trace_bdrv_aio_discard(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_flush(void * bs, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_readv(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_aio_writev(void * bs, int64_t sector_num, int nb_sectors, void * opaque)
+{
+}
+
+static inline void trace_bdrv_lock_medium(void * bs, bool locked)
+{
+}
+
+static inline void trace_bdrv_co_readv(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_copy_on_readv(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_writev(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_write_zeroes(void * bs, int64_t sector_num, int nb_sector)
+{
+}
+
+static inline void trace_bdrv_co_io_em(void * bs, int64_t sector_num, int nb_sectors, int is_write, void * acb)
+{
+}
+
+static inline void trace_bdrv_co_do_copy_on_readv(void * bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors)
+{
+}
+
+static inline void trace_stream_one_iteration(void * s, int64_t sector_num, int nb_sectors, int is_allocated)
+{
+}
+
+static inline void trace_stream_start(void * bs, void * base, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_commit_one_iteration(void * s, int64_t sector_num, int nb_sectors, int is_allocated)
+{
+}
+
+static inline void trace_commit_start(void * bs, void * base, void * top, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_mirror_start(void * bs, void * s, void * co, void * opaque)
+{
+}
+
+static inline void trace_mirror_restart_iter(void * s, int64_t cnt)
+{
+}
+
+static inline void trace_mirror_before_flush(void * s)
+{
+}
+
+static inline void trace_mirror_before_drain(void * s, int64_t cnt)
+{
+}
+
+static inline void trace_mirror_before_sleep(void * s, int64_t cnt, int synced)
+{
+}
+
+static inline void trace_mirror_one_iteration(void * s, int64_t sector_num, int nb_sectors)
+{
+}
+
+static inline void trace_mirror_cow(void * s, int64_t sector_num)
+{
+}
+
+static inline void trace_mirror_iteration_done(void * s, int64_t sector_num, int nb_sectors, int ret)
+{
+}
+
+static inline void trace_mirror_yield(void * s, int64_t cnt, int buf_free_count, int in_flight)
+{
+}
+
+static inline void trace_mirror_yield_in_flight(void * s, int64_t sector_num, int in_flight)
+{
+}
+
+static inline void trace_mirror_yield_buf_busy(void * s, int nb_chunks, int in_flight)
+{
+}
+
+static inline void trace_mirror_break_buf_busy(void * s, int nb_chunks, int in_flight)
+{
+}
+
+static inline void trace_backup_do_cow_enter(void * job, int64_t start, int64_t sector_num, int nb_sectors)
+{
+}
+
+static inline void trace_backup_do_cow_return(void * job, int64_t sector_num, int nb_sectors, int ret)
+{
+}
+
+static inline void trace_backup_do_cow_skip(void * job, int64_t start)
+{
+}
+
+static inline void trace_backup_do_cow_process(void * job, int64_t start)
+{
+}
+
+static inline void trace_backup_do_cow_read_fail(void * job, int64_t start, int ret)
+{
+}
+
+static inline void trace_backup_do_cow_write_fail(void * job, int64_t start, int ret)
+{
+}
+
+static inline void trace_qmp_block_job_cancel(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_pause(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_resume(void * job)
+{
+}
+
+static inline void trace_qmp_block_job_complete(void * job)
+{
+}
+
+static inline void trace_block_job_cb(void * bs, void * job, int ret)
+{
+}
+
+static inline void trace_qmp_block_stream(void * bs, void * job)
+{
+}
+
+static inline void trace_virtio_blk_req_complete(void * req, int status)
+{
+}
+
+static inline void trace_virtio_blk_rw_complete(void * req, int ret)
+{
+}
+
+static inline void trace_virtio_blk_handle_write(void * req, uint64_t sector, size_t nsectors)
+{
+}
+
+static inline void trace_virtio_blk_handle_read(void * req, uint64_t sector, size_t nsectors)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_start(void * s)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_stop(void * s)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_process_request(void * s, unsigned int out_num, unsigned int in_num, unsigned int head)
+{
+}
+
+static inline void trace_virtio_blk_data_plane_complete_request(void * s, unsigned int head, int ret)
+{
+}
+
+static inline void trace_vring_setup(uint64_t physical, void * desc, void * avail, void * used)
+{
+}
+
+static inline void trace_thread_pool_submit(void * pool, void * req, void * opaque)
+{
+}
+
+static inline void trace_thread_pool_complete(void * pool, void * req, void * opaque, int ret)
+{
+}
+
+static inline void trace_thread_pool_cancel(void * req, void * opaque)
+{
+}
+
+static inline void trace_paio_submit(void * acb, void * opaque, int64_t sector_num, int nb_sectors, int type)
+{
+}
+
+static inline void trace_paio_complete(void * acb, void * opaque, int ret)
+{
+}
+
+static inline void trace_paio_cancel(void * acb, void * opaque)
+{
+}
+
+static inline void trace_cpu_in(unsigned int addr, unsigned int val)
+{
+}
+
+static inline void trace_cpu_out(unsigned int addr, unsigned int val)
+{
+}
+
+static inline void trace_balloon_event(void * opaque, unsigned long addr)
+{
+}
+
+static inline void trace_apic_local_deliver(int vector, uint32_t lvt)
+{
+}
+
+static inline void trace_apic_deliver_irq(uint8_t dest, uint8_t dest_mode, uint8_t delivery_mode, uint8_t vector_num, uint8_t trigger_mode)
+{
+}
+
+static inline void trace_cpu_set_apic_base(uint64_t val)
+{
+}
+
+static inline void trace_cpu_get_apic_base(uint64_t val)
+{
+}
+
+static inline void trace_apic_mem_readl(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_apic_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_apic_report_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_apic_reset_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_apic_get_irq_delivered(int apic_irq_delivered)
+{
+}
+
+static inline void trace_cs4231_mem_readl_dreg(uint32_t reg, uint32_t ret)
+{
+}
+
+static inline void trace_cs4231_mem_readl_reg(uint32_t reg, uint32_t ret)
+{
+}
+
+static inline void trace_cs4231_mem_writel_reg(uint32_t reg, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_cs4231_mem_writel_dreg(uint32_t reg, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_nvram_read(uint32_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_nvram_write(uint32_t addr, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mer(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mdr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_mfsr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_vcr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_dr(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_ecr0(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_writel_ecr1(uint32_t val)
+{
+}
+
+static inline void trace_ecc_mem_readl_mer(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mdr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfsr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_vcr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfar0(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_mfar1(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_dr(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_ecr0(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_mem_readl_ecr1(uint32_t ret)
+{
+}
+
+static inline void trace_ecc_diag_mem_writeb(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_ecc_diag_mem_readb(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_fw_cfg_write(void * s, uint8_t value)
+{
+}
+
+static inline void trace_fw_cfg_select(void * s, uint16_t key, int ret)
+{
+}
+
+static inline void trace_fw_cfg_read(void * s, uint8_t ret)
+{
+}
+
+static inline void trace_fw_cfg_add_file_dupe(void * s, char * name)
+{
+}
+
+static inline void trace_fw_cfg_add_file(void * s, int index, char * name, size_t len)
+{
+}
+
+static inline void trace_hd_geometry_lchs_guess(void * bs, int cyls, int heads, int secs)
+{
+}
+
+static inline void trace_hd_geometry_guess(void * bs, uint32_t cyls, uint32_t heads, uint32_t secs, int trans)
+{
+}
+
+static inline void trace_jazz_led_read(uint64_t addr, uint8_t val)
+{
+}
+
+static inline void trace_jazz_led_write(uint64_t addr, uint8_t new)
+{
+}
+
+static inline void trace_lance_mem_readw(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_lance_mem_writew(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctl_mem_readl(uint32_t cpu, uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel(uint32_t cpu, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel_clear(uint32_t cpu, uint32_t val, uint32_t intreg_pending)
+{
+}
+
+static inline void trace_slavio_intctl_mem_writel_set(uint32_t cpu, uint32_t val, uint32_t intreg_pending)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_enable(uint32_t val, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_disable(uint32_t val, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_intctlm_mem_writel_target(uint32_t cpu)
+{
+}
+
+static inline void trace_slavio_check_interrupts(uint32_t pending, uint32_t intregm_disabled)
+{
+}
+
+static inline void trace_slavio_set_irq(uint32_t target_cpu, int irq, uint32_t pil, int level)
+{
+}
+
+static inline void trace_slavio_set_timer_irq_cpu(int cpu, int level)
+{
+}
+
+static inline void trace_slavio_misc_update_irq_raise(void)
+{
+}
+
+static inline void trace_slavio_misc_update_irq_lower(void)
+{
+}
+
+static inline void trace_slavio_set_power_fail(int power_failing, uint8_t config)
+{
+}
+
+static inline void trace_slavio_cfg_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_cfg_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_diag_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_diag_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_mdm_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_mdm_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_aux1_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_aux1_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_aux2_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_slavio_aux2_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_apc_mem_writeb(uint32_t val)
+{
+}
+
+static inline void trace_apc_mem_readb(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_sysctrl_mem_writel(uint32_t val)
+{
+}
+
+static inline void trace_slavio_sysctrl_mem_readl(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_led_mem_writew(uint32_t val)
+{
+}
+
+static inline void trace_slavio_led_mem_readw(uint32_t ret)
+{
+}
+
+static inline void trace_slavio_timer_get_out(uint64_t limit, uint32_t counthigh, uint32_t count)
+{
+}
+
+static inline void trace_slavio_timer_irq(uint32_t counthigh, uint32_t count)
+{
+}
+
+static inline void trace_slavio_timer_mem_readl_invalid(uint64_t addr)
+{
+}
+
+static inline void trace_slavio_timer_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_limit(unsigned int timer_index, uint64_t count)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_counter_invalid(void)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_status_start(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_status_stop(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_user(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_counter(unsigned int timer_index)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_mode_invalid(void)
+{
+}
+
+static inline void trace_slavio_timer_mem_writel_invalid(uint64_t addr)
+{
+}
+
+static inline void trace_ledma_memory_read(uint64_t addr)
+{
+}
+
+static inline void trace_ledma_memory_write(uint64_t addr)
+{
+}
+
+static inline void trace_sparc32_dma_set_irq_raise(void)
+{
+}
+
+static inline void trace_sparc32_dma_set_irq_lower(void)
+{
+}
+
+static inline void trace_espdma_memory_read(uint32_t addr)
+{
+}
+
+static inline void trace_espdma_memory_write(uint32_t addr)
+{
+}
+
+static inline void trace_sparc32_dma_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_sparc32_dma_mem_writel(uint64_t addr, uint32_t old, uint32_t val)
+{
+}
+
+static inline void trace_sparc32_dma_enable_raise(void)
+{
+}
+
+static inline void trace_sparc32_dma_enable_lower(void)
+{
+}
+
+static inline void trace_sun4m_cpu_interrupt(unsigned int level)
+{
+}
+
+static inline void trace_sun4m_cpu_reset_interrupt(unsigned int level)
+{
+}
+
+static inline void trace_sun4m_cpu_set_irq_raise(int level)
+{
+}
+
+static inline void trace_sun4m_cpu_set_irq_lower(int level)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_readl(uint64_t addr, uint32_t ret)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_ctrl(uint64_t iostart)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_tlbflush(uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_mem_writel_pgflush(uint32_t val)
+{
+}
+
+static inline void trace_sun4m_iommu_page_get_flags(uint64_t pa, uint64_t iopte, uint32_t ret)
+{
+}
+
+static inline void trace_sun4m_iommu_translate_pa(uint64_t addr, uint64_t pa, uint32_t iopte)
+{
+}
+
+static inline void trace_sun4m_iommu_bad_addr(uint64_t addr)
+{
+}
+
+static inline void trace_usb_packet_state_change(int bus, const char * port, int ep, void * p, const char * o, const char * n)
+{
+}
+
+static inline void trace_usb_packet_state_fault(int bus, const char * port, int ep, void * p, const char * o, const char * n)
+{
+}
+
+static inline void trace_usb_port_claim(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_port_attach(int bus, const char * port, const char * devspeed, const char * portspeed)
+{
+}
+
+static inline void trace_usb_port_detach(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_port_release(int bus, const char * port)
+{
+}
+
+static inline void trace_usb_ehci_reset(void)
+{
+}
+
+static inline void trace_usb_ehci_opreg_read(uint32_t addr, const char * str, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_opreg_write(uint32_t addr, const char * str, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_opreg_change(uint32_t addr, const char * str, uint32_t new, uint32_t old)
+{
+}
+
+static inline void trace_usb_ehci_portsc_read(uint32_t addr, uint32_t port, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_portsc_write(uint32_t addr, uint32_t port, uint32_t val)
+{
+}
+
+static inline void trace_usb_ehci_portsc_change(uint32_t addr, uint32_t port, uint32_t new, uint32_t old)
+{
+}
+
+static inline void trace_usb_ehci_usbsts(const char * sts, int state)
+{
+}
+
+static inline void trace_usb_ehci_state(const char * schedule, const char * state)
+{
+}
+
+static inline void trace_usb_ehci_qh_ptrs(void * q, uint32_t addr, uint32_t nxt, uint32_t c_qtd, uint32_t n_qtd, uint32_t a_qtd)
+{
+}
+
+static inline void trace_usb_ehci_qh_fields(uint32_t addr, int rl, int mplen, int eps, int ep, int devaddr)
+{
+}
+
+static inline void trace_usb_ehci_qh_bits(uint32_t addr, int c, int h, int dtc, int i)
+{
+}
+
+static inline void trace_usb_ehci_qtd_ptrs(void * q, uint32_t addr, uint32_t nxt, uint32_t altnext)
+{
+}
+
+static inline void trace_usb_ehci_qtd_fields(uint32_t addr, int tbytes, int cpage, int cerr, int pid)
+{
+}
+
+static inline void trace_usb_ehci_qtd_bits(uint32_t addr, int ioc, int active, int halt, int babble, int xacterr)
+{
+}
+
+static inline void trace_usb_ehci_itd(uint32_t addr, uint32_t nxt, uint32_t mplen, uint32_t mult, uint32_t ep, uint32_t devaddr)
+{
+}
+
+static inline void trace_usb_ehci_sitd(uint32_t addr, uint32_t nxt, uint32_t active)
+{
+}
+
+static inline void trace_usb_ehci_port_attach(uint32_t port, const char * owner, const char * device)
+{
+}
+
+static inline void trace_usb_ehci_port_detach(uint32_t port, const char * owner)
+{
+}
+
+static inline void trace_usb_ehci_port_reset(uint32_t port, int enable)
+{
+}
+
+static inline void trace_usb_ehci_data(int rw, uint32_t cpage, uint32_t offset, uint32_t addr, uint32_t len, uint32_t bufpos)
+{
+}
+
+static inline void trace_usb_ehci_queue_action(void * q, const char * action)
+{
+}
+
+static inline void trace_usb_ehci_packet_action(void * q, void * p, const char * action)
+{
+}
+
+static inline void trace_usb_ehci_irq(uint32_t level, uint32_t frindex, uint32_t sts, uint32_t mask)
+{
+}
+
+static inline void trace_usb_ehci_guest_bug(const char * reason)
+{
+}
+
+static inline void trace_usb_ehci_doorbell_ring(void)
+{
+}
+
+static inline void trace_usb_ehci_doorbell_ack(void)
+{
+}
+
+static inline void trace_usb_ehci_dma_error(void)
+{
+}
+
+static inline void trace_usb_uhci_reset(void)
+{
+}
+
+static inline void trace_usb_uhci_schedule_start(void)
+{
+}
+
+static inline void trace_usb_uhci_schedule_stop(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_start(uint32_t num)
+{
+}
+
+static inline void trace_usb_uhci_frame_stop_bandwidth(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_loop_stop_idle(void)
+{
+}
+
+static inline void trace_usb_uhci_frame_loop_continue(void)
+{
+}
+
+static inline void trace_usb_uhci_mmio_readw(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_usb_uhci_mmio_writew(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_usb_uhci_queue_add(uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_queue_del(uint32_t token, const char * reason)
+{
+}
+
+static inline void trace_usb_uhci_packet_add(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_link_async(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_unlink_async(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_cancel(uint32_t token, uint32_t addr, int done)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_success(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_shortxfer(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_stall(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_babble(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_complete_error(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_packet_del(uint32_t token, uint32_t addr)
+{
+}
+
+static inline void trace_usb_uhci_qh_load(uint32_t qh)
+{
+}
+
+static inline void trace_usb_uhci_td_load(uint32_t qh, uint32_t td, uint32_t ctrl, uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_td_queue(uint32_t td, uint32_t ctrl, uint32_t token)
+{
+}
+
+static inline void trace_usb_uhci_td_nextqh(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_uhci_td_async(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_uhci_td_complete(uint32_t qh, uint32_t td)
+{
+}
+
+static inline void trace_usb_xhci_reset(void)
+{
+}
+
+static inline void trace_usb_xhci_run(void)
+{
+}
+
+static inline void trace_usb_xhci_stop(void)
+{
+}
+
+static inline void trace_usb_xhci_cap_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_oper_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_port_read(uint32_t port, uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_runtime_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_doorbell_read(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_oper_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_port_write(uint32_t port, uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_runtime_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_doorbell_write(uint32_t off, uint32_t val)
+{
+}
+
+static inline void trace_usb_xhci_irq_intx(uint32_t level)
+{
+}
+
+static inline void trace_usb_xhci_irq_msi(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix_use(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_irq_msix_unuse(uint32_t nr)
+{
+}
+
+static inline void trace_usb_xhci_queue_event(uint32_t vector, uint32_t idx, const char * trb, const char * evt, uint64_t param, uint32_t status, uint32_t control)
+{
+}
+
+static inline void trace_usb_xhci_fetch_trb(uint64_t addr, const char * name, uint64_t param, uint32_t status, uint32_t control)
+{
+}
+
+static inline void trace_usb_xhci_port_reset(uint32_t port)
+{
+}
+
+static inline void trace_usb_xhci_port_link(uint32_t port, uint32_t pls)
+{
+}
+
+static inline void trace_usb_xhci_port_notify(uint32_t port, uint32_t pls)
+{
+}
+
+static inline void trace_usb_xhci_slot_enable(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_disable(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_address(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_configure(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_evaluate(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_slot_reset(uint32_t slotid)
+{
+}
+
+static inline void trace_usb_xhci_ep_enable(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_disable(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_set_dequeue(uint32_t slotid, uint32_t epid, uint32_t streamid, uint64_t param)
+{
+}
+
+static inline void trace_usb_xhci_ep_kick(uint32_t slotid, uint32_t epid, uint32_t streamid)
+{
+}
+
+static inline void trace_usb_xhci_ep_stop(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_ep_reset(uint32_t slotid, uint32_t epid)
+{
+}
+
+static inline void trace_usb_xhci_xfer_start(void * xfer, uint32_t slotid, uint32_t epid, uint32_t streamid)
+{
+}
+
+static inline void trace_usb_xhci_xfer_async(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_nak(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_retry(void * xfer)
+{
+}
+
+static inline void trace_usb_xhci_xfer_success(void * xfer, uint32_t bytes)
+{
+}
+
+static inline void trace_usb_xhci_xfer_error(void * xfer, uint32_t ret)
+{
+}
+
+static inline void trace_usb_xhci_unimplemented(const char * item, int nr)
+{
+}
+
+static inline void trace_usb_desc_device(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_device_qualifier(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_config(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_other_speed_config(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_string(int addr, int index, int len, int ret)
+{
+}
+
+static inline void trace_usb_desc_bos(int addr, int len, int ret)
+{
+}
+
+static inline void trace_usb_set_addr(int addr)
+{
+}
+
+static inline void trace_usb_set_config(int addr, int config, int ret)
+{
+}
+
+static inline void trace_usb_set_interface(int addr, int iface, int alt, int ret)
+{
+}
+
+static inline void trace_usb_clear_device_feature(int addr, int feature, int ret)
+{
+}
+
+static inline void trace_usb_set_device_feature(int addr, int feature, int ret)
+{
+}
+
+static inline void trace_usb_hub_reset(int addr)
+{
+}
+
+static inline void trace_usb_hub_control(int addr, int request, int value, int index, int length)
+{
+}
+
+static inline void trace_usb_hub_get_port_status(int addr, int nr, int status, int changed)
+{
+}
+
+static inline void trace_usb_hub_set_port_feature(int addr, int nr, const char * f)
+{
+}
+
+static inline void trace_usb_hub_clear_port_feature(int addr, int nr, const char * f)
+{
+}
+
+static inline void trace_usb_hub_attach(int addr, int nr)
+{
+}
+
+static inline void trace_usb_hub_detach(int addr, int nr)
+{
+}
+
+static inline void trace_usb_uas_reset(int addr)
+{
+}
+
+static inline void trace_usb_uas_command(int addr, uint16_t tag, int lun, uint32_t lun64_1, uint32_t lun64_2)
+{
+}
+
+static inline void trace_usb_uas_response(int addr, uint16_t tag, uint8_t code)
+{
+}
+
+static inline void trace_usb_uas_sense(int addr, uint16_t tag, uint8_t status)
+{
+}
+
+static inline void trace_usb_uas_read_ready(int addr, uint16_t tag)
+{
+}
+
+static inline void trace_usb_uas_write_ready(int addr, uint16_t tag)
+{
+}
+
+static inline void trace_usb_uas_xfer_data(int addr, uint16_t tag, uint32_t copy, uint32_t uoff, uint32_t usize, uint32_t soff, uint32_t ssize)
+{
+}
+
+static inline void trace_usb_uas_scsi_data(int addr, uint16_t tag, uint32_t bytes)
+{
+}
+
+static inline void trace_usb_uas_scsi_complete(int addr, uint16_t tag, uint32_t status, uint32_t resid)
+{
+}
+
+static inline void trace_usb_uas_tmf_abort_task(int addr, uint16_t tag, uint16_t task_tag)
+{
+}
+
+static inline void trace_usb_uas_tmf_logical_unit_reset(int addr, uint16_t tag, int lun)
+{
+}
+
+static inline void trace_usb_uas_tmf_unsupported(int addr, uint16_t tag, uint32_t function)
+{
+}
+
+static inline void trace_usb_host_open_started(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_open_success(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_open_failure(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_disconnect(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_close(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_attach_kernel(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_detach_kernel(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_set_address(int bus, int addr, int config)
+{
+}
+
+static inline void trace_usb_host_set_config(int bus, int addr, int config)
+{
+}
+
+static inline void trace_usb_host_set_interface(int bus, int addr, int interface, int alt)
+{
+}
+
+static inline void trace_usb_host_claim_interfaces(int bus, int addr, int config, int nif)
+{
+}
+
+static inline void trace_usb_host_claim_interface(int bus, int addr, int config, int interface)
+{
+}
+
+static inline void trace_usb_host_release_interfaces(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_release_interface(int bus, int addr, int interface)
+{
+}
+
+static inline void trace_usb_host_req_control(int bus, int addr, void * p, int req, int value, int index)
+{
+}
+
+static inline void trace_usb_host_req_data(int bus, int addr, void * p, int in, int ep, int size)
+{
+}
+
+static inline void trace_usb_host_req_complete(int bus, int addr, void * p, int status, int length)
+{
+}
+
+static inline void trace_usb_host_req_emulated(int bus, int addr, void * p, int status)
+{
+}
+
+static inline void trace_usb_host_req_canceled(int bus, int addr, void * p)
+{
+}
+
+static inline void trace_usb_host_urb_submit(int bus, int addr, void * aurb, int length, int more)
+{
+}
+
+static inline void trace_usb_host_urb_complete(int bus, int addr, void * aurb, int status, int length, int more)
+{
+}
+
+static inline void trace_usb_host_urb_canceled(int bus, int addr, void * aurb)
+{
+}
+
+static inline void trace_usb_host_ep_set_halt(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_ep_clear_halt(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_start(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_stop(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_out_of_bufs(int bus, int addr, int ep)
+{
+}
+
+static inline void trace_usb_host_iso_many_urbs(int bus, int addr, int count)
+{
+}
+
+static inline void trace_usb_host_reset(int bus, int addr)
+{
+}
+
+static inline void trace_usb_host_auto_scan_enabled(void)
+{
+}
+
+static inline void trace_usb_host_auto_scan_disabled(void)
+{
+}
+
+static inline void trace_usb_host_claim_port(int bus, int hub, int port)
+{
+}
+
+static inline void trace_usb_host_parse_device(int bus, int addr, int vendor, int product)
+{
+}
+
+static inline void trace_usb_host_parse_config(int bus, int addr, int value, int active)
+{
+}
+
+static inline void trace_usb_host_parse_interface(int bus, int addr, int num, int alt, int active)
+{
+}
+
+static inline void trace_usb_host_parse_endpoint(int bus, int addr, int ep, const char * dir, const char * type, int active)
+{
+}
+
+static inline void trace_usb_host_parse_unknown(int bus, int addr, int len, int type)
+{
+}
+
+static inline void trace_usb_host_parse_error(int bus, int addr, const char * errmsg)
+{
+}
+
+static inline void trace_scsi_req_alloc(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_cancel(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_data(int target, int lun, int tag, int len)
+{
+}
+
+static inline void trace_scsi_req_data_canceled(int target, int lun, int tag, int len)
+{
+}
+
+static inline void trace_scsi_req_dequeue(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_continue(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_continue_canceled(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_req_parsed(int target, int lun, int tag, int cmd, int mode, int xfer)
+{
+}
+
+static inline void trace_scsi_req_parsed_lba(int target, int lun, int tag, int cmd, uint64_t lba)
+{
+}
+
+static inline void trace_scsi_req_parse_bad(int target, int lun, int tag, int cmd)
+{
+}
+
+static inline void trace_scsi_req_build_sense(int target, int lun, int tag, int key, int asc, int ascq)
+{
+}
+
+static inline void trace_scsi_device_set_ua(int target, int lun, int key, int asc, int ascq)
+{
+}
+
+static inline void trace_scsi_report_luns(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_inquiry(int target, int lun, int tag, int cdb1, int cdb2)
+{
+}
+
+static inline void trace_scsi_test_unit_ready(int target, int lun, int tag)
+{
+}
+
+static inline void trace_scsi_request_sense(int target, int lun, int tag)
+{
+}
+
+static inline void trace_vm_state_notify(int running, int reason)
+{
+}
+
+static inline void trace_load_file(const char * name, const char * path)
+{
+}
+
+static inline void trace_runstate_set(int new_state)
+{
+}
+
+static inline void trace_qcow2_writev_start_req(void * co, int64_t sector, int nb_sectors)
+{
+}
+
+static inline void trace_qcow2_writev_done_req(void * co, int ret)
+{
+}
+
+static inline void trace_qcow2_writev_start_part(void * co)
+{
+}
+
+static inline void trace_qcow2_writev_done_part(void * co, int cur_nr_sectors)
+{
+}
+
+static inline void trace_qcow2_writev_data(void * co, uint64_t offset)
+{
+}
+
+static inline void trace_qcow2_alloc_clusters_offset(void * co, uint64_t offset, int n_start, int n_end)
+{
+}
+
+static inline void trace_qcow2_handle_copied(void * co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes)
+{
+}
+
+static inline void trace_qcow2_handle_alloc(void * co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes)
+{
+}
+
+static inline void trace_qcow2_do_alloc_clusters_offset(void * co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters)
+{
+}
+
+static inline void trace_qcow2_cluster_alloc_phys(void * co)
+{
+}
+
+static inline void trace_qcow2_cluster_link_l2(void * co, int nb_clusters)
+{
+}
+
+static inline void trace_qcow2_l2_allocate(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_get_empty(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_write_l2(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_write_l1(void * bs, int l1_index)
+{
+}
+
+static inline void trace_qcow2_l2_allocate_done(void * bs, int l1_index, int ret)
+{
+}
+
+static inline void trace_qcow2_cache_get(void * co, int c, uint64_t offset, bool read_from_disk)
+{
+}
+
+static inline void trace_qcow2_cache_get_replace_entry(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_get_read(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_get_done(void * co, int c, int i)
+{
+}
+
+static inline void trace_qcow2_cache_flush(void * co, int c)
+{
+}
+
+static inline void trace_qcow2_cache_entry_flush(void * co, int c, int i)
+{
+}
+
+static inline void trace_qed_alloc_l2_cache_entry(void * l2_cache, void * entry)
+{
+}
+
+static inline void trace_qed_unref_l2_cache_entry(void * entry, int ref)
+{
+}
+
+static inline void trace_qed_find_l2_cache_entry(void * l2_cache, void * entry, uint64_t offset, int ref)
+{
+}
+
+static inline void trace_qed_read_table(void * s, uint64_t offset, void * table)
+{
+}
+
+static inline void trace_qed_read_table_cb(void * s, void * table, int ret)
+{
+}
+
+static inline void trace_qed_write_table(void * s, uint64_t offset, void * table, unsigned int index, unsigned int n)
+{
+}
+
+static inline void trace_qed_write_table_cb(void * s, void * table, int flush, int ret)
+{
+}
+
+static inline void trace_qed_need_check_timer_cb(void * s)
+{
+}
+
+static inline void trace_qed_start_need_check_timer(void * s)
+{
+}
+
+static inline void trace_qed_cancel_need_check_timer(void * s)
+{
+}
+
+static inline void trace_qed_aio_complete(void * s, void * acb, int ret)
+{
+}
+
+static inline void trace_qed_aio_setup(void * s, void * acb, int64_t sector_num, int nb_sectors, void * opaque, int flags)
+{
+}
+
+static inline void trace_qed_aio_next_io(void * s, void * acb, int ret, uint64_t cur_pos)
+{
+}
+
+static inline void trace_qed_aio_read_data(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_qed_aio_write_data(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_qed_aio_write_prefill(void * s, void * acb, uint64_t start, size_t len, uint64_t offset)
+{
+}
+
+static inline void trace_qed_aio_write_postfill(void * s, void * acb, uint64_t start, size_t len, uint64_t offset)
+{
+}
+
+static inline void trace_qed_aio_write_main(void * s, void * acb, int ret, uint64_t offset, size_t len)
+{
+}
+
+static inline void trace_g364fb_read(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_g364fb_write(uint64_t addr, uint32_t new)
+{
+}
+
+static inline void trace_grlib_gptimer_enable(int id, uint32_t count)
+{
+}
+
+static inline void trace_grlib_gptimer_disabled(int id, uint32_t config)
+{
+}
+
+static inline void trace_grlib_gptimer_restart(int id, uint32_t reload)
+{
+}
+
+static inline void trace_grlib_gptimer_set_scaler(uint32_t scaler, uint32_t freq)
+{
+}
+
+static inline void trace_grlib_gptimer_hit(int id)
+{
+}
+
+static inline void trace_grlib_gptimer_readl(int id, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_grlib_gptimer_writel(int id, uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_grlib_irqmp_check_irqs(uint32_t pend, uint32_t force, uint32_t mask, uint32_t lvl1, uint32_t lvl2)
+{
+}
+
+static inline void trace_grlib_irqmp_ack(int intno)
+{
+}
+
+static inline void trace_grlib_irqmp_set_irq(int irq)
+{
+}
+
+static inline void trace_grlib_irqmp_readl_unknown(uint64_t addr)
+{
+}
+
+static inline void trace_grlib_irqmp_writel_unknown(uint64_t addr, uint32_t value)
+{
+}
+
+static inline void trace_grlib_apbuart_event(int event)
+{
+}
+
+static inline void trace_grlib_apbuart_writel_unknown(uint64_t addr, uint32_t value)
+{
+}
+
+static inline void trace_grlib_apbuart_readl_unknown(uint64_t addr)
+{
+}
+
+static inline void trace_leon3_set_irq(int intno)
+{
+}
+
+static inline void trace_leon3_reset_irq(int intno)
+{
+}
+
+static inline void trace_spice_vmc_write(ssize_t out, int len)
+{
+}
+
+static inline void trace_spice_vmc_read(int bytes, int len)
+{
+}
+
+static inline void trace_spice_vmc_register_interface(void * scd)
+{
+}
+
+static inline void trace_spice_vmc_unregister_interface(void * scd)
+{
+}
+
+static inline void trace_spice_vmc_event(int event)
+{
+}
+
+static inline void trace_lm32_pic_raise_irq(void)
+{
+}
+
+static inline void trace_lm32_pic_lower_irq(void)
+{
+}
+
+static inline void trace_lm32_pic_interrupt(int irq, int level)
+{
+}
+
+static inline void trace_lm32_pic_set_im(uint32_t im)
+{
+}
+
+static inline void trace_lm32_pic_set_ip(uint32_t ip)
+{
+}
+
+static inline void trace_lm32_pic_get_im(uint32_t im)
+{
+}
+
+static inline void trace_lm32_pic_get_ip(uint32_t ip)
+{
+}
+
+static inline void trace_lm32_juart_get_jtx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_set_jtx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_get_jrx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_juart_set_jrx(uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_timer_hit(void)
+{
+}
+
+static inline void trace_lm32_timer_irq_state(int level)
+{
+}
+
+static inline void trace_lm32_uart_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_uart_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_lm32_uart_irq_state(int level)
+{
+}
+
+static inline void trace_lm32_sys_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_megasas_init_firmware(uint64_t pa)
+{
+}
+
+static inline void trace_megasas_init_queue(uint64_t queue_pa, int queue_len, uint64_t head, uint64_t tail, uint32_t flags)
+{
+}
+
+static inline void trace_megasas_initq_map_failed(int frame)
+{
+}
+
+static inline void trace_megasas_initq_mismatch(int queue_len, int fw_cmds)
+{
+}
+
+static inline void trace_megasas_qf_found(unsigned int index, uint64_t pa)
+{
+}
+
+static inline void trace_megasas_qf_new(unsigned int index, void * cmd)
+{
+}
+
+static inline void trace_megasas_qf_failed(unsigned long pa)
+{
+}
+
+static inline void trace_megasas_qf_enqueue(unsigned int index, unsigned int count, uint64_t context, unsigned int tail, int busy)
+{
+}
+
+static inline void trace_megasas_qf_update(unsigned int head, unsigned int busy)
+{
+}
+
+static inline void trace_megasas_qf_dequeue(unsigned int index)
+{
+}
+
+static inline void trace_megasas_qf_map_failed(int cmd, unsigned long frame)
+{
+}
+
+static inline void trace_megasas_qf_complete_noirq(uint64_t context)
+{
+}
+
+static inline void trace_megasas_qf_complete(uint64_t context, unsigned int tail, unsigned int offset, int busy, unsigned int doorbell)
+{
+}
+
+static inline void trace_megasas_handle_frame(const char * cmd, uint64_t addr, uint64_t context, uint32_t count)
+{
+}
+
+static inline void trace_megasas_frame_busy(uint64_t addr)
+{
+}
+
+static inline void trace_megasas_unhandled_frame_cmd(int cmd, uint8_t frame_cmd)
+{
+}
+
+static inline void trace_megasas_handle_scsi(const char * frame, int bus, int dev, int lun, void * sdev, unsigned long size)
+{
+}
+
+static inline void trace_megasas_scsi_target_not_present(const char * frame, int bus, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_scsi_invalid_cdb_len(const char * frame, int bus, int dev, int lun, int len)
+{
+}
+
+static inline void trace_megasas_iov_read_overflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_write_overflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_read_underflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_iov_write_underflow(int cmd, int bytes, int len)
+{
+}
+
+static inline void trace_megasas_scsi_req_alloc_failed(const char * frame, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_scsi_read_start(int cmd, int len)
+{
+}
+
+static inline void trace_megasas_scsi_write_start(int cmd, int len)
+{
+}
+
+static inline void trace_megasas_scsi_nodata(int cmd)
+{
+}
+
+static inline void trace_megasas_scsi_complete(int cmd, uint32_t status, int len, int xfer)
+{
+}
+
+static inline void trace_megasas_command_complete(int cmd, uint32_t status, uint32_t resid)
+{
+}
+
+static inline void trace_megasas_handle_io(int cmd, const char * frame, int dev, int lun, unsigned long lba, unsigned long count)
+{
+}
+
+static inline void trace_megasas_io_target_not_present(int cmd, const char * frame, int dev, int lun)
+{
+}
+
+static inline void trace_megasas_io_read_start(int cmd, unsigned long lba, unsigned long count, unsigned long len)
+{
+}
+
+static inline void trace_megasas_io_write_start(int cmd, unsigned long lba, unsigned long count, unsigned long len)
+{
+}
+
+static inline void trace_megasas_io_complete(int cmd, uint32_t len)
+{
+}
+
+static inline void trace_megasas_io_read(int cmd, int bytes, int len, unsigned long offset)
+{
+}
+
+static inline void trace_megasas_io_write(int cmd, int bytes, int len, unsigned long offset)
+{
+}
+
+static inline void trace_megasas_io_continue(int cmd, int bytes)
+{
+}
+
+static inline void trace_megasas_iovec_map_failed(int cmd, int index, unsigned long iov_size)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_overflow(int cmd, int index, int limit)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_underflow(int cmd, int index)
+{
+}
+
+static inline void trace_megasas_iovec_sgl_invalid(int cmd, int index, uint64_t pa, uint32_t len)
+{
+}
+
+static inline void trace_megasas_iovec_overflow(int cmd, int len, int limit)
+{
+}
+
+static inline void trace_megasas_iovec_underflow(int cmd, int len, int limit)
+{
+}
+
+static inline void trace_megasas_handle_dcmd(int cmd, int opcode)
+{
+}
+
+static inline void trace_megasas_finish_dcmd(int cmd, int size)
+{
+}
+
+static inline void trace_megasas_dcmd_req_alloc_failed(int cmd, const char * desc)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_submit(int cmd, const char * desc, int dev)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_finish(int cmd, int opcode, int lun)
+{
+}
+
+static inline void trace_megasas_dcmd_internal_invalid(int cmd, int opcode)
+{
+}
+
+static inline void trace_megasas_dcmd_unhandled(int cmd, int opcode, int len)
+{
+}
+
+static inline void trace_megasas_dcmd_zero_sge(int cmd)
+{
+}
+
+static inline void trace_megasas_dcmd_invalid_sge(int cmd, int count)
+{
+}
+
+static inline void trace_megasas_dcmd_map_failed(int cmd)
+{
+}
+
+static inline void trace_megasas_dcmd_invalid_xfer_len(int cmd, unsigned long size, unsigned long max)
+{
+}
+
+static inline void trace_megasas_dcmd_enter(int cmd, const char * dcmd, int len)
+{
+}
+
+static inline void trace_megasas_dcmd_dummy(int cmd, unsigned long size)
+{
+}
+
+static inline void trace_megasas_dcmd_set_fw_time(int cmd, unsigned long time)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_get_list(int cmd, int num, int max, int offset)
+{
+}
+
+static inline void trace_megasas_dcmd_ld_get_list(int cmd, int num, int max)
+{
+}
+
+static inline void trace_megasas_dcmd_ld_get_info(int cmd, int ld_id)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_get_info(int cmd, int pd_id)
+{
+}
+
+static inline void trace_megasas_dcmd_pd_list_query(int cmd, int flags)
+{
+}
+
+static inline void trace_megasas_dcmd_unsupported(int cmd, unsigned long size)
+{
+}
+
+static inline void trace_megasas_abort_frame(int cmd, int abort_cmd)
+{
+}
+
+static inline void trace_megasas_abort_no_cmd(int cmd, uint64_t context)
+{
+}
+
+static inline void trace_megasas_abort_invalid_context(int cmd, uint64_t context, int abort_cmd)
+{
+}
+
+static inline void trace_megasas_reset(void)
+{
+}
+
+static inline void trace_megasas_init(int sges, int cmds, const char * intr, const char * mode)
+{
+}
+
+static inline void trace_megasas_msix_raise(int vector)
+{
+}
+
+static inline void trace_megasas_irq_lower(void)
+{
+}
+
+static inline void trace_megasas_irq_raise(void)
+{
+}
+
+static inline void trace_megasas_intr_enabled(void)
+{
+}
+
+static inline void trace_megasas_intr_disabled(void)
+{
+}
+
+static inline void trace_megasas_mmio_readl(unsigned long addr, uint32_t val)
+{
+}
+
+static inline void trace_megasas_mmio_invalid_readl(unsigned long addr)
+{
+}
+
+static inline void trace_megasas_mmio_writel(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_megasas_mmio_invalid_writel(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_milkymist_ac97_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_ac97_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_crrequest(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_crreply(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_dmaw(void)
+{
+}
+
+static inline void trace_milkymist_ac97_pulse_irq_dmar(void)
+{
+}
+
+static inline void trace_milkymist_ac97_in_cb(int avail, uint32_t remaining)
+{
+}
+
+static inline void trace_milkymist_ac97_in_cb_transferred(int transferred)
+{
+}
+
+static inline void trace_milkymist_ac97_out_cb(int free, uint32_t remaining)
+{
+}
+
+static inline void trace_milkymist_ac97_out_cb_transferred(int transferred)
+{
+}
+
+static inline void trace_milkymist_hpdmc_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_hpdmc_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_memcard_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_memcard_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_mdio_write(uint8_t phy_addr, uint8_t addr, uint16_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_mdio_read(uint8_t phy_addr, uint8_t addr, uint16_t value)
+{
+}
+
+static inline void trace_milkymist_minimac2_tx_frame(uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_rx_frame(const void * buf, uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_drop_rx_frame(const void * buf)
+{
+}
+
+static inline void trace_milkymist_minimac2_rx_transfer(const void * buf, uint32_t length)
+{
+}
+
+static inline void trace_milkymist_minimac2_raise_irq_rx(void)
+{
+}
+
+static inline void trace_milkymist_minimac2_lower_irq_rx(void)
+{
+}
+
+static inline void trace_milkymist_minimac2_pulse_irq_tx(void)
+{
+}
+
+static inline void trace_milkymist_pfpu_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_pfpu_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_pfpu_vectout(uint32_t a, uint32_t b, uint32_t dma_ptr)
+{
+}
+
+static inline void trace_milkymist_pfpu_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_softusb_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_softusb_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_softusb_mevt(uint8_t m)
+{
+}
+
+static inline void trace_milkymist_softusb_kevt(uint8_t m)
+{
+}
+
+static inline void trace_milkymist_softusb_mouse_event(int dx, int dy, int dz, int bs)
+{
+}
+
+static inline void trace_milkymist_softusb_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_icap_write(uint32_t value)
+{
+}
+
+static inline void trace_milkymist_sysctl_start_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_stop_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_start_timer1(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_stop_timer1(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_pulse_irq_timer0(void)
+{
+}
+
+static inline void trace_milkymist_sysctl_pulse_irq_timer1(void)
+{
+}
+
+static inline void trace_milkymist_tmu2_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_tmu2_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_tmu2_start(void)
+{
+}
+
+static inline void trace_milkymist_tmu2_pulse_irq(void)
+{
+}
+
+static inline void trace_milkymist_uart_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_uart_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_uart_raise_irq(void)
+{
+}
+
+static inline void trace_milkymist_uart_lower_irq(void)
+{
+}
+
+static inline void trace_milkymist_vgafb_memory_read(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_milkymist_vgafb_memory_write(uint32_t addr, uint32_t value)
+{
+}
+
+static inline void trace_mipsnet_send(uint32_t size)
+{
+}
+
+static inline void trace_mipsnet_receive(uint32_t size)
+{
+}
+
+static inline void trace_mipsnet_read(uint64_t addr, uint32_t val)
+{
+}
+
+static inline void trace_mipsnet_write(uint64_t addr, uint64_t val)
+{
+}
+
+static inline void trace_mipsnet_irq(uint32_t isr, uint32_t intctl)
+{
+}
+
+static inline void trace_pc87312_io_read(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_pc87312_io_write(uint32_t addr, uint32_t val)
+{
+}
+
+static inline void trace_pc87312_info_floppy(uint32_t base)
+{
+}
+
+static inline void trace_pc87312_info_ide(uint32_t base)
+{
+}
+
+static inline void trace_pc87312_info_parallel(uint32_t base, uint32_t irq)
+{
+}
+
+static inline void trace_pc87312_info_serial(int n, uint32_t base, uint32_t irq)
+{
+}
+
+static inline void trace_pvscsi_ring_init_data(uint32_t txr_len_log2, uint32_t rxr_len_log2)
+{
+}
+
+static inline void trace_pvscsi_ring_init_msg(uint32_t len_log2)
+{
+}
+
+static inline void trace_pvscsi_ring_flush_cmp(uint64_t filled_cmp_ptr)
+{
+}
+
+static inline void trace_pvscsi_ring_flush_msg(uint64_t filled_cmp_ptr)
+{
+}
+
+static inline void trace_pvscsi_update_irq_level(bool raise, uint64_t mask, uint64_t status)
+{
+}
+
+static inline void trace_pvscsi_update_irq_msi(void)
+{
+}
+
+static inline void trace_pvscsi_cmp_ring_put(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_msg_ring_put(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_complete_request(uint64_t context, uint64_t len, uint8_t sense_key)
+{
+}
+
+static inline void trace_pvscsi_get_sg_list(int nsg, size_t size)
+{
+}
+
+static inline void trace_pvscsi_get_next_sg_elem(uint32_t flags)
+{
+}
+
+static inline void trace_pvscsi_command_complete_not_found(uint32_t tag)
+{
+}
+
+static inline void trace_pvscsi_command_complete_data_run(void)
+{
+}
+
+static inline void trace_pvscsi_command_complete_sense_len(int len)
+{
+}
+
+static inline void trace_pvscsi_convert_sglist(uint64_t context, unsigned long addr, uint32_t resid)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr(uint8_t cmd, uint64_t ctx)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr_unknown_device(void)
+{
+}
+
+static inline void trace_pvscsi_process_req_descr_invalid_dir(void)
+{
+}
+
+static inline void trace_pvscsi_process_io(unsigned long addr)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_noimpl(const char* cmd)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_reset_dev(uint32_t tgt, int lun, void* dev)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_arrived(const char* cmd)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_abort(uint64_t ctx, uint32_t tgt)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_unknown(uint64_t cmd_id)
+{
+}
+
+static inline void trace_pvscsi_on_cmd_unknown_data(uint32_t data)
+{
+}
+
+static inline void trace_pvscsi_io_write(const char* cmd, uint64_t val)
+{
+}
+
+static inline void trace_pvscsi_io_write_unknown(unsigned long addr, unsigned sz, uint64_t val)
+{
+}
+
+static inline void trace_pvscsi_io_read(const char* cmd, uint64_t status)
+{
+}
+
+static inline void trace_pvscsi_io_read_unknown(unsigned long addr, unsigned sz)
+{
+}
+
+static inline void trace_pvscsi_init_msi_fail(int res)
+{
+}
+
+static inline void trace_pvscsi_state(const char* state)
+{
+}
+
+static inline void trace_pvscsi_tx_rings_ppn(const char* label, uint64_t ppn)
+{
+}
+
+static inline void trace_pvscsi_tx_rings_num_pages(const char* label, uint32_t num)
+{
+}
+
+static inline void trace_xen_ram_alloc(unsigned long ram_addr, unsigned long size)
+{
+}
+
+static inline void trace_xen_client_set_memory(uint64_t start_addr, unsigned long size, bool log_dirty)
+{
+}
+
+static inline void trace_xen_map_cache(uint64_t phys_addr)
+{
+}
+
+static inline void trace_xen_remap_bucket(uint64_t index)
+{
+}
+
+static inline void trace_xen_map_cache_return(void* ptr)
+{
+}
+
+static inline void trace_xen_map_block(uint64_t phys_addr, uint64_t size)
+{
+}
+
+static inline void trace_xen_unmap_block(void* addr, unsigned long size)
+{
+}
+
+static inline void trace_xen_platform_log(char * s)
+{
+}
+
+static inline void trace_qemu_coroutine_enter(void * from, void * to, void * opaque)
+{
+}
+
+static inline void trace_qemu_coroutine_yield(void * from, void * to)
+{
+}
+
+static inline void trace_qemu_coroutine_terminate(void * co)
+{
+}
+
+static inline void trace_qemu_co_queue_run_restart(void * co)
+{
+}
+
+static inline void trace_qemu_co_queue_next(void * nxt)
+{
+}
+
+static inline void trace_qemu_co_mutex_lock_entry(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_lock_return(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_unlock_entry(void * mutex, void * self)
+{
+}
+
+static inline void trace_qemu_co_mutex_unlock_return(void * mutex, void * self)
+{
+}
+
+static inline void trace_escc_put_queue(char channel, int b)
+{
+}
+
+static inline void trace_escc_get_queue(char channel, int val)
+{
+}
+
+static inline void trace_escc_update_irq(int irq)
+{
+}
+
+static inline void trace_escc_update_parameters(char channel, int speed, int parity, int data_bits, int stop_bits)
+{
+}
+
+static inline void trace_escc_mem_writeb_ctrl(char channel, uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_escc_mem_writeb_data(char channel, uint32_t val)
+{
+}
+
+static inline void trace_escc_mem_readb_ctrl(char channel, uint32_t reg, uint8_t val)
+{
+}
+
+static inline void trace_escc_mem_readb_data(char channel, uint32_t ret)
+{
+}
+
+static inline void trace_escc_serial_receive_byte(char channel, int ch)
+{
+}
+
+static inline void trace_escc_sunkbd_event_in(int ch)
+{
+}
+
+static inline void trace_escc_sunkbd_event_out(int ch)
+{
+}
+
+static inline void trace_escc_kbd_command(int val)
+{
+}
+
+static inline void trace_escc_sunmouse_event(int dx, int dy, int buttons_state)
+{
+}
+
+static inline void trace_iscsi_aio_write16_cb(void * iscsi, int status, void * acb, int canceled)
+{
+}
+
+static inline void trace_iscsi_aio_writev(void * iscsi, int64_t sector_num, int nb_sectors, void * opaque, void * acb)
+{
+}
+
+static inline void trace_iscsi_aio_read16_cb(void * iscsi, int status, void * acb, int canceled)
+{
+}
+
+static inline void trace_iscsi_aio_readv(void * iscsi, int64_t sector_num, int nb_sectors, void * opaque, void * acb)
+{
+}
+
+static inline void trace_esp_error_fifo_overrun(void)
+{
+}
+
+static inline void trace_esp_error_unhandled_command(uint32_t val)
+{
+}
+
+static inline void trace_esp_error_invalid_write(uint32_t val, uint32_t addr)
+{
+}
+
+static inline void trace_esp_raise_irq(void)
+{
+}
+
+static inline void trace_esp_lower_irq(void)
+{
+}
+
+static inline void trace_esp_dma_enable(void)
+{
+}
+
+static inline void trace_esp_dma_disable(void)
+{
+}
+
+static inline void trace_esp_get_cmd(uint32_t dmalen, int target)
+{
+}
+
+static inline void trace_esp_do_busid_cmd(uint8_t busid)
+{
+}
+
+static inline void trace_esp_handle_satn_stop(uint32_t cmdlen)
+{
+}
+
+static inline void trace_esp_write_response(uint32_t status)
+{
+}
+
+static inline void trace_esp_do_dma(uint32_t cmdlen, uint32_t len)
+{
+}
+
+static inline void trace_esp_command_complete(void)
+{
+}
+
+static inline void trace_esp_command_complete_unexpected(void)
+{
+}
+
+static inline void trace_esp_command_complete_fail(void)
+{
+}
+
+static inline void trace_esp_transfer_data(uint32_t dma_left, int32_t ti_size)
+{
+}
+
+static inline void trace_esp_handle_ti(uint32_t minlen)
+{
+}
+
+static inline void trace_esp_handle_ti_cmd(uint32_t cmdlen)
+{
+}
+
+static inline void trace_esp_mem_readb(uint32_t saddr, uint8_t reg)
+{
+}
+
+static inline void trace_esp_mem_writeb(uint32_t saddr, uint8_t reg, uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_nop(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_flush(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_reset(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_bus_reset(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_iccs(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_msgacc(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_pad(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_satn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_rstatn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_sel(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_selatn(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_selatns(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_ensel(uint32_t val)
+{
+}
+
+static inline void trace_esp_mem_writeb_cmd_dissel(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_dma_direction(void)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_read(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_write(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_error_invalid_write_dma(uint32_t val, uint32_t addr)
+{
+}
+
+static inline void trace_esp_pci_dma_read(uint32_t saddr, uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_dma_write(uint32_t saddr, uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_idle(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_blast(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_abort(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_dma_start(uint32_t val)
+{
+}
+
+static inline void trace_esp_pci_sbac_read(uint32_t reg)
+{
+}
+
+static inline void trace_esp_pci_sbac_write(uint32_t reg, uint32_t val)
+{
+}
+
+static inline void trace_handle_qmp_command(void * mon, const char * cmd_name)
+{
+}
+
+static inline void trace_monitor_protocol_emitter(void * mon)
+{
+}
+
+static inline void trace_monitor_protocol_event(uint32_t event, const char * evname, void * data)
+{
+}
+
+static inline void trace_monitor_protocol_event_handler(uint32_t event, void * data, uint64_t last, uint64_t now)
+{
+}
+
+static inline void trace_monitor_protocol_event_emit(uint32_t event, void * data)
+{
+}
+
+static inline void trace_monitor_protocol_event_queue(uint32_t event, void * data, uint64_t rate, uint64_t last, uint64_t now)
+{
+}
+
+static inline void trace_monitor_protocol_event_throttle(uint32_t event, uint64_t rate)
+{
+}
+
+static inline void trace_open_eth_mii_write(unsigned idx, uint16_t v)
+{
+}
+
+static inline void trace_open_eth_mii_read(unsigned idx, uint16_t v)
+{
+}
+
+static inline void trace_open_eth_update_irq(uint32_t v)
+{
+}
+
+static inline void trace_open_eth_receive(unsigned len)
+{
+}
+
+static inline void trace_open_eth_receive_mcast(unsigned idx, uint32_t h0, uint32_t h1)
+{
+}
+
+static inline void trace_open_eth_receive_reject(void)
+{
+}
+
+static inline void trace_open_eth_receive_desc(uint32_t addr, uint32_t len_flags)
+{
+}
+
+static inline void trace_open_eth_start_xmit(uint32_t addr, unsigned len, unsigned tx_len)
+{
+}
+
+static inline void trace_open_eth_reg_read(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_reg_write(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_desc_read(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_open_eth_desc_write(uint32_t addr, uint32_t v)
+{
+}
+
+static inline void trace_v9fs_rerror(uint16_t tag, uint8_t id, int err)
+{
+}
+
+static inline void trace_v9fs_version(uint16_t tag, uint8_t id, int32_t msize, char* version)
+{
+}
+
+static inline void trace_v9fs_version_return(uint16_t tag, uint8_t id, int32_t msize, char* version)
+{
+}
+
+static inline void trace_v9fs_attach(uint16_t tag, uint8_t id, int32_t fid, int32_t afid, char* uname, char* aname)
+{
+}
+
+static inline void trace_v9fs_attach_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_stat(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_stat_return(uint16_t tag, uint8_t id, int32_t mode, int32_t atime, int32_t mtime, int64_t length)
+{
+}
+
+static inline void trace_v9fs_getattr(uint16_t tag, uint8_t id, int32_t fid, uint64_t request_mask)
+{
+}
+
+static inline void trace_v9fs_getattr_return(uint16_t tag, uint8_t id, uint64_t result_mask, uint32_t mode, uint32_t uid, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_walk(uint16_t tag, uint8_t id, int32_t fid, int32_t newfid, uint16_t nwnames)
+{
+}
+
+static inline void trace_v9fs_walk_return(uint16_t tag, uint8_t id, uint16_t nwnames, void* qids)
+{
+}
+
+static inline void trace_v9fs_open(uint16_t tag, uint8_t id, int32_t fid, int32_t mode)
+{
+}
+
+static inline void trace_v9fs_open_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int iounit)
+{
+}
+
+static inline void trace_v9fs_lcreate(uint16_t tag, uint8_t id, int32_t dfid, int32_t flags, int32_t mode, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_lcreate_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int32_t iounit)
+{
+}
+
+static inline void trace_v9fs_fsync(uint16_t tag, uint8_t id, int32_t fid, int datasync)
+{
+}
+
+static inline void trace_v9fs_clunk(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_read(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t max_count)
+{
+}
+
+static inline void trace_v9fs_read_return(uint16_t tag, uint8_t id, int32_t count, ssize_t err)
+{
+}
+
+static inline void trace_v9fs_readdir(uint16_t tag, uint8_t id, int32_t fid, uint64_t offset, uint32_t max_count)
+{
+}
+
+static inline void trace_v9fs_readdir_return(uint16_t tag, uint8_t id, uint32_t count, ssize_t retval)
+{
+}
+
+static inline void trace_v9fs_write(uint16_t tag, uint8_t id, int32_t fid, uint64_t off, uint32_t count, int cnt)
+{
+}
+
+static inline void trace_v9fs_write_return(uint16_t tag, uint8_t id, int32_t total, ssize_t err)
+{
+}
+
+static inline void trace_v9fs_create(uint16_t tag, uint8_t id, int32_t fid, char* name, int32_t perm, int8_t mode)
+{
+}
+
+static inline void trace_v9fs_create_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int iounit)
+{
+}
+
+static inline void trace_v9fs_symlink(uint16_t tag, uint8_t id, int32_t fid, char* name, char* symname, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_symlink_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_flush(uint16_t tag, uint8_t id, int16_t flush_tag)
+{
+}
+
+static inline void trace_v9fs_link(uint16_t tag, uint8_t id, int32_t dfid, int32_t oldfid, char* name)
+{
+}
+
+static inline void trace_v9fs_remove(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_wstat(uint16_t tag, uint8_t id, int32_t fid, int32_t mode, int32_t atime, int32_t mtime)
+{
+}
+
+static inline void trace_v9fs_mknod(uint16_t tag, uint8_t id, int32_t fid, int mode, int major, int minor)
+{
+}
+
+static inline void trace_v9fs_mknod_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path)
+{
+}
+
+static inline void trace_v9fs_lock(uint16_t tag, uint8_t id, int32_t fid, uint8_t type, uint64_t start, uint64_t length)
+{
+}
+
+static inline void trace_v9fs_lock_return(uint16_t tag, uint8_t id, int8_t status)
+{
+}
+
+static inline void trace_v9fs_getlock(uint16_t tag, uint8_t id, int32_t fid, uint8_t type, uint64_t start, uint64_t length)
+{
+}
+
+static inline void trace_v9fs_getlock_return(uint16_t tag, uint8_t id, uint8_t type, uint64_t start, uint64_t length, uint32_t proc_id)
+{
+}
+
+static inline void trace_v9fs_mkdir(uint16_t tag, uint8_t id, int32_t fid, char* name, int mode, uint32_t gid)
+{
+}
+
+static inline void trace_v9fs_mkdir_return(uint16_t tag, uint8_t id, int8_t type, int32_t version, int64_t path, int err)
+{
+}
+
+static inline void trace_v9fs_xattrwalk(uint16_t tag, uint8_t id, int32_t fid, int32_t newfid, char* name)
+{
+}
+
+static inline void trace_v9fs_xattrwalk_return(uint16_t tag, uint8_t id, int64_t size)
+{
+}
+
+static inline void trace_v9fs_xattrcreate(uint16_t tag, uint8_t id, int32_t fid, char* name, int64_t size, int flags)
+{
+}
+
+static inline void trace_v9fs_readlink(uint16_t tag, uint8_t id, int32_t fid)
+{
+}
+
+static inline void trace_v9fs_readlink_return(uint16_t tag, uint8_t id, char* target)
+{
+}
+
+static inline void trace_mmu_helper_dfault(uint64_t address, uint64_t context, int mmu_idx, uint32_t tl)
+{
+}
+
+static inline void trace_mmu_helper_dprot(uint64_t address, uint64_t context, int mmu_idx, uint32_t tl)
+{
+}
+
+static inline void trace_mmu_helper_dmiss(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_tfault(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_tmiss(uint64_t address, uint64_t context)
+{
+}
+
+static inline void trace_mmu_helper_get_phys_addr_code(uint32_t tl, int mmu_idx, uint64_t prim_context, uint64_t sec_context, uint64_t address)
+{
+}
+
+static inline void trace_mmu_helper_get_phys_addr_data(uint32_t tl, int mmu_idx, uint64_t prim_context, uint64_t sec_context, uint64_t address)
+{
+}
+
+static inline void trace_mmu_helper_mmu_fault(uint64_t address, uint64_t paddr, int mmu_idx, uint32_t tl, uint64_t prim_context, uint64_t sec_context)
+{
+}
+
+static inline void trace_int_helper_set_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_clear_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_write_softint(uint32_t softint)
+{
+}
+
+static inline void trace_int_helper_icache_freeze(void)
+{
+}
+
+static inline void trace_int_helper_dcache_freeze(void)
+{
+}
+
+static inline void trace_win_helper_gregset_error(uint32_t pstate)
+{
+}
+
+static inline void trace_win_helper_switch_pstate(uint32_t pstate_regs, uint32_t new_pstate_regs)
+{
+}
+
+static inline void trace_win_helper_no_switch_pstate(uint32_t new_pstate_regs)
+{
+}
+
+static inline void trace_win_helper_wrpil(uint32_t psrpil, uint32_t new_pil)
+{
+}
+
+static inline void trace_win_helper_done(uint32_t tl)
+{
+}
+
+static inline void trace_win_helper_retry(uint32_t tl)
+{
+}
+
+static inline void trace_dma_bdrv_io(void * dbs, void * bs, int64_t sector_num, bool to_dev)
+{
+}
+
+static inline void trace_dma_aio_cancel(void * dbs)
+{
+}
+
+static inline void trace_dma_complete(void * dbs, int ret, void * cb)
+{
+}
+
+static inline void trace_dma_bdrv_cb(void * dbs, int ret)
+{
+}
+
+static inline void trace_dma_map_wait(void * dbs)
+{
+}
+
+static inline void trace_console_gfx_new(void)
+{
+}
+
+static inline void trace_console_txt_new(int w, int h)
+{
+}
+
+static inline void trace_console_select(int nr)
+{
+}
+
+static inline void trace_console_refresh(int interval)
+{
+}
+
+static inline void trace_displaysurface_create(void * display_surface, int w, int h)
+{
+}
+
+static inline void trace_displaysurface_create_from(void * display_surface, int w, int h, int bpp, int swap)
+{
+}
+
+static inline void trace_displaysurface_free(void * display_surface)
+{
+}
+
+static inline void trace_displaychangelistener_register(void * dcl, const char * name)
+{
+}
+
+static inline void trace_displaychangelistener_unregister(void * dcl, const char * name)
+{
+}
+
+static inline void trace_ppm_save(const char * filename, void * display_surface)
+{
+}
+
+static inline void trace_vmware_value_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_value_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_palette_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_palette_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_scratch_read(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_scratch_write(uint32_t index, uint32_t value)
+{
+}
+
+static inline void trace_vmware_setmode(uint32_t w, uint32_t h, uint32_t bpp)
+{
+}
+
+static inline void trace_savevm_section_start(void)
+{
+}
+
+static inline void trace_savevm_section_end(unsigned int section_id)
+{
+}
+
+static inline void trace_migration_bitmap_sync_start(void)
+{
+}
+
+static inline void trace_migration_bitmap_sync_end(uint64_t dirty_pages)
+{
+}
+
+static inline void trace_migration_throttle(void)
+{
+}
+
+static inline void trace_qxl_create_guest_primary(int qid, uint32_t width, uint32_t height, uint64_t mem, uint32_t format, uint32_t position)
+{
+}
+
+static inline void trace_qxl_create_guest_primary_rest(int qid, int32_t stride, uint32_t type, uint32_t flags)
+{
+}
+
+static inline void trace_qxl_destroy_primary(int qid)
+{
+}
+
+static inline void trace_qxl_enter_vga_mode(int qid)
+{
+}
+
+static inline void trace_qxl_exit_vga_mode(int qid)
+{
+}
+
+static inline void trace_qxl_hard_reset(int qid, int64_t loadvm)
+{
+}
+
+static inline void trace_qxl_interface_async_complete_io(int qid, uint32_t current_async, void * cookie)
+{
+}
+
+static inline void trace_qxl_interface_attach_worker(int qid)
+{
+}
+
+static inline void trace_qxl_interface_get_init_info(int qid)
+{
+}
+
+static inline void trace_qxl_interface_set_compression_level(int qid, int64_t level)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete(int qid, uint32_t surface_id, uint32_t dirty_left, uint32_t dirty_right, uint32_t dirty_top, uint32_t dirty_bottom)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_rest(int qid, uint32_t num_updated_rects)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_overflow(int qid, int max)
+{
+}
+
+static inline void trace_qxl_interface_update_area_complete_schedule_bh(int qid, uint32_t num_dirty)
+{
+}
+
+static inline void trace_qxl_io_destroy_primary_ignored(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_io_log(int qid, const uint8_t * log_buf)
+{
+}
+
+static inline void trace_qxl_io_read_unexpected(int qid)
+{
+}
+
+static inline void trace_qxl_io_unexpected_vga_mode(int qid, uint64_t addr, uint64_t val, const char * desc)
+{
+}
+
+static inline void trace_qxl_io_write(int qid, const char * mode, uint64_t addr, uint64_t val, unsigned size, int async)
+{
+}
+
+static inline void trace_qxl_memslot_add_guest(int qid, uint32_t slot_id, uint64_t guest_start, uint64_t guest_end)
+{
+}
+
+static inline void trace_qxl_post_load(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_pre_load(int qid)
+{
+}
+
+static inline void trace_qxl_pre_save(int qid)
+{
+}
+
+static inline void trace_qxl_reset_surfaces(int qid)
+{
+}
+
+static inline void trace_qxl_ring_command_check(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_command_get(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_command_req_notification(int qid)
+{
+}
+
+static inline void trace_qxl_ring_cursor_check(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_cursor_get(int qid, const char * mode)
+{
+}
+
+static inline void trace_qxl_ring_cursor_req_notification(int qid)
+{
+}
+
+static inline void trace_qxl_ring_res_push(int qid, const char * mode, uint32_t surface_count, uint32_t free_res, void * last_release, const char * notify)
+{
+}
+
+static inline void trace_qxl_ring_res_push_rest(int qid, uint32_t ring_has, uint32_t ring_size, uint32_t prod, uint32_t cons)
+{
+}
+
+static inline void trace_qxl_ring_res_put(int qid, uint32_t free_res)
+{
+}
+
+static inline void trace_qxl_set_mode(int qid, int modenr, uint32_t x_res, uint32_t y_res, uint32_t bits, uint64_t devmem)
+{
+}
+
+static inline void trace_qxl_soft_reset(int qid)
+{
+}
+
+static inline void trace_qemu_spice_add_memslot(int qid, uint32_t slot_id, unsigned long virt_start, unsigned long virt_end, int async)
+{
+}
+
+static inline void trace_qemu_spice_del_memslot(int qid, uint32_t gid, uint32_t slot_id)
+{
+}
+
+static inline void trace_qemu_spice_create_primary_surface(int qid, uint32_t sid, void * surface, int async)
+{
+}
+
+static inline void trace_qemu_spice_destroy_primary_surface(int qid, uint32_t sid, int async)
+{
+}
+
+static inline void trace_qemu_spice_wakeup(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_start(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_stop(uint32_t qid)
+{
+}
+
+static inline void trace_qemu_spice_create_update(uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surfaces_complete(int qid)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surfaces(int qid, int async)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surface_wait_complete(int qid, uint32_t id)
+{
+}
+
+static inline void trace_qxl_spice_destroy_surface_wait(int qid, uint32_t id, int async)
+{
+}
+
+static inline void trace_qxl_spice_flush_surfaces_async(int qid, uint32_t surface_count, uint32_t num_free_res)
+{
+}
+
+static inline void trace_qxl_spice_monitors_config(int qid)
+{
+}
+
+static inline void trace_qxl_spice_loadvm_commands(int qid, void * ext, uint32_t count)
+{
+}
+
+static inline void trace_qxl_spice_oom(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_cursor(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_image_cache(int qid)
+{
+}
+
+static inline void trace_qxl_spice_reset_memslots(int qid)
+{
+}
+
+static inline void trace_qxl_spice_update_area(int qid, uint32_t surface_id, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom)
+{
+}
+
+static inline void trace_qxl_spice_update_area_rest(int qid, uint32_t num_dirty_rects, uint32_t clear_dirty_region)
+{
+}
+
+static inline void trace_qxl_surfaces_dirty(int qid, int surface, int offset, int size)
+{
+}
+
+static inline void trace_qxl_send_events(int qid, uint32_t events)
+{
+}
+
+static inline void trace_qxl_send_events_vm_stopped(int qid, uint32_t events)
+{
+}
+
+static inline void trace_qxl_set_guest_bug(int qid)
+{
+}
+
+static inline void trace_qxl_interrupt_client_monitors_config(int qid, int num_heads, void * heads)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_unsupported_by_guest(int qid, uint32_t int_mask, void * client_monitors_config)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_unsupported_by_device(int qid, int revision)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_capped(int qid, int requested, int limit)
+{
+}
+
+static inline void trace_qxl_client_monitors_config_crc(int qid, unsigned size, uint32_t crc32)
+{
+}
+
+static inline void trace_qxl_set_client_capabilities_unsupported_by_revision(int qid, int revision)
+{
+}
+
+static inline void trace_qxl_render_blit_guest_primary_initialized(void)
+{
+}
+
+static inline void trace_qxl_render_blit(int32_t stride, int32_t left, int32_t right, int32_t top, int32_t bottom)
+{
+}
+
+static inline void trace_qxl_render_guest_primary_resized(int32_t width, int32_t height, int32_t stride, int32_t bytes_pp, int32_t bits_pp)
+{
+}
+
+static inline void trace_qxl_render_update_area_done(void * cookie)
+{
+}
+
+static inline void trace_spapr_pci_msi(const char * msg, uint32_t n, uint32_t ca)
+{
+}
+
+static inline void trace_spapr_pci_msi_setup(const char * name, unsigned vector, uint64_t addr)
+{
+}
+
+static inline void trace_spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req)
+{
+}
+
+static inline void trace_spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr)
+{
+}
+
+static inline void trace_spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq)
+{
+}
+
+static inline void trace_spapr_pci_lsi_set(const char * busname, int pin, uint32_t irq)
+{
+}
+
+static inline void trace_xics_icp_check_ipi(int server, uint8_t mfrr)
+{
+}
+
+static inline void trace_xics_icp_accept(uint32_t old_xirr, uint32_t new_xirr)
+{
+}
+
+static inline void trace_xics_icp_eoi(int server, uint32_t xirr, uint32_t new_xirr)
+{
+}
+
+static inline void trace_xics_icp_irq(int server, int nr, uint8_t priority)
+{
+}
+
+static inline void trace_xics_icp_raise(uint32_t xirr, uint8_t pending_priority)
+{
+}
+
+static inline void trace_xics_set_irq_msi(int srcno, int nr)
+{
+}
+
+static inline void trace_xics_masked_pending(void)
+{
+}
+
+static inline void trace_xics_set_irq_lsi(int srcno, int nr)
+{
+}
+
+static inline void trace_xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority)
+{
+}
+
+static inline void trace_xics_ics_reject(int nr, int srcno)
+{
+}
+
+static inline void trace_xics_ics_eoi(int nr)
+{
+}
+
+static inline void trace_hbitmap_iter_skip_words(const void * hb, void * hbi, uint64_t pos, unsigned long cur)
+{
+}
+
+static inline void trace_hbitmap_reset(void * hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit)
+{
+}
+
+static inline void trace_hbitmap_set(void * hb, uint64_t start, uint64_t count, uint64_t sbit, uint64_t ebit)
+{
+}
+
+static inline void trace_ioinst(const char * insn)
+{
+}
+
+static inline void trace_ioinst_sch_id(const char * insn, int cssid, int ssid, int schid)
+{
+}
+
+static inline void trace_ioinst_chp_id(const char * insn, int cssid, int chpid)
+{
+}
+
+static inline void trace_ioinst_chsc_cmd(uint16_t cmd, uint16_t len)
+{
+}
+
+static inline void trace_css_enable_facility(const char * facility)
+{
+}
+
+static inline void trace_css_crw(uint8_t rsc, uint8_t erc, uint16_t rsid, const char * chained)
+{
+}
+
+static inline void trace_css_chpid_add(uint8_t cssid, uint8_t chpid, uint8_t type)
+{
+}
+
+static inline void trace_css_new_image(uint8_t cssid, const char * default_cssid)
+{
+}
+
+static inline void trace_css_assign_subch(const char * do_assign, uint8_t cssid, uint8_t ssid, uint16_t schid, uint16_t devno)
+{
+}
+
+static inline void trace_css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, const char * conditional)
+{
+}
+
+static inline void trace_virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code)
+{
+}
+
+static inline void trace_virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char * devno_mode)
+{
+}
+
+static inline void trace_migrate_set_state(int new_state)
+{
+}
+
+static inline void trace_kvm_ioctl(int type, void * arg)
+{
+}
+
+static inline void trace_kvm_vm_ioctl(int type, void * arg)
+{
+}
+
+static inline void trace_kvm_vcpu_ioctl(int cpu_index, int type, void * arg)
+{
+}
+
+static inline void trace_kvm_run_exit(int cpu_index, uint32_t reason)
+{
+}
+
+static inline void trace_object_dynamic_cast_assert(const char * type, const char * target, const char * file, int line, const char * func)
+{
+}
+
+static inline void trace_object_class_dynamic_cast_assert(const char * type, const char * target, const char * file, int line, const char * func)
+{
+}
+#endif /* TRACE__GENERATED_TRACERS_H */
diff --git a/contrib/qemu/util/aes.c b/contrib/qemu/util/aes.c
new file mode 100644
index 000000000..91e97fa6e
--- /dev/null
+++ b/contrib/qemu/util/aes.c
@@ -0,0 +1,1314 @@
+/**
+ *
+ * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
+ */
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "qemu-common.h"
+#include "qemu/aes.h"
+
+#ifndef NDEBUG
+#define NDEBUG
+#endif
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+const uint32_t AES_Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+const uint32_t AES_Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+const uint32_t AES_Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+const uint32_t AES_Te3[256] = {
+
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+const uint32_t AES_Te4[256] = {
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+};
+const uint32_t AES_Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+const uint32_t AES_Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+const uint32_t AES_Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+const uint32_t AES_Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+const uint32_t AES_Td4[256] = {
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits==128)
+ key->rounds = 10;
+ else if (bits==192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey );
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (AES_Te4[(temp >> 24) ] & 0xff000000) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ AES_Td0[AES_Te4[(rk[0] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ AES_Td0[AES_Te4[(rk[1] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ AES_Td0[AES_Te4[(rk[2] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ AES_Td0[AES_Te4[(rk[3] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+#ifndef AES_ASM
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Te0[(s0 >> 24) ] ^
+ AES_Te1[(s1 >> 16) & 0xff] ^
+ AES_Te2[(s2 >> 8) & 0xff] ^
+ AES_Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Te0[(s1 >> 24) ] ^
+ AES_Te1[(s2 >> 16) & 0xff] ^
+ AES_Te2[(s3 >> 8) & 0xff] ^
+ AES_Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Te0[(s2 >> 24) ] ^
+ AES_Te1[(s3 >> 16) & 0xff] ^
+ AES_Te2[(s0 >> 8) & 0xff] ^
+ AES_Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Te0[(s3 >> 24) ] ^
+ AES_Te1[(s0 >> 16) & 0xff] ^
+ AES_Te2[(s1 >> 8) & 0xff] ^
+ AES_Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Te0[(t0 >> 24) ] ^
+ AES_Te1[(t1 >> 16) & 0xff] ^
+ AES_Te2[(t2 >> 8) & 0xff] ^
+ AES_Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Te0[(t1 >> 24) ] ^
+ AES_Te1[(t2 >> 16) & 0xff] ^
+ AES_Te2[(t3 >> 8) & 0xff] ^
+ AES_Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Te0[(t2 >> 24) ] ^
+ AES_Te1[(t3 >> 16) & 0xff] ^
+ AES_Te2[(t0 >> 8) & 0xff] ^
+ AES_Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Te0[(t3 >> 24) ] ^
+ AES_Te1[(t0 >> 16) & 0xff] ^
+ AES_Te2[(t1 >> 8) & 0xff] ^
+ AES_Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Te4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Te4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Te4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Te4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Td0[(s0 >> 24) ] ^
+ AES_Td1[(s3 >> 16) & 0xff] ^
+ AES_Td2[(s2 >> 8) & 0xff] ^
+ AES_Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Td0[(s1 >> 24) ] ^
+ AES_Td1[(s0 >> 16) & 0xff] ^
+ AES_Td2[(s3 >> 8) & 0xff] ^
+ AES_Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Td0[(s2 >> 24) ] ^
+ AES_Td1[(s1 >> 16) & 0xff] ^
+ AES_Td2[(s0 >> 8) & 0xff] ^
+ AES_Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Td0[(s3 >> 24) ] ^
+ AES_Td1[(s2 >> 16) & 0xff] ^
+ AES_Td2[(s1 >> 8) & 0xff] ^
+ AES_Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Td0[(t0 >> 24) ] ^
+ AES_Td1[(t3 >> 16) & 0xff] ^
+ AES_Td2[(t2 >> 8) & 0xff] ^
+ AES_Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Td0[(t1 >> 24) ] ^
+ AES_Td1[(t0 >> 16) & 0xff] ^
+ AES_Td2[(t3 >> 8) & 0xff] ^
+ AES_Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Td0[(t2 >> 24) ] ^
+ AES_Td1[(t1 >> 16) & 0xff] ^
+ AES_Td2[(t0 >> 8) & 0xff] ^
+ AES_Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Td0[(t3 >> 24) ] ^
+ AES_Td1[(t2 >> 16) & 0xff] ^
+ AES_Td2[(t1 >> 8) & 0xff] ^
+ AES_Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Td4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Td4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Td4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Td4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#endif /* AES_ASM */
+
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc)
+{
+
+ unsigned long n;
+ unsigned long len = length;
+ unsigned char tmp[AES_BLOCK_SIZE];
+
+ assert(in && out && key && ivec);
+
+ if (enc) {
+ while (len >= AES_BLOCK_SIZE) {
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ AES_encrypt(tmp, out, key);
+ memcpy(ivec, out, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ for(n=0; n < len; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ for(n=len; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = ivec[n];
+ AES_encrypt(tmp, tmp, key);
+ memcpy(out, tmp, AES_BLOCK_SIZE);
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ } else {
+ while (len >= AES_BLOCK_SIZE) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(in, out, key);
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ out[n] ^= ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(tmp, tmp, key);
+ for(n=0; n < len; ++n)
+ out[n] = tmp[n] ^ ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/contrib/qemu/util/bitmap.c b/contrib/qemu/util/bitmap.c
new file mode 100644
index 000000000..687841dce
--- /dev/null
+++ b/contrib/qemu/util/bitmap.c
@@ -0,0 +1,256 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap[k]) {
+ return 0;
+ }
+ }
+ if (bits % BITS_PER_LONG) {
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (~bitmap[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] != bitmap2[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+ int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ dst[k] = ~src[k];
+ }
+
+ if (bits % BITS_PER_LONG) {
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+ }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ }
+ return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] | bitmap2[k];
+ }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+ }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ }
+ return result != 0;
+}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+void bitmap_clear(unsigned long *map, int start, int nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const int size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+#define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask)
+{
+ unsigned long index, end, i;
+again:
+ index = find_next_zero_bit(map, size, start);
+
+ /* Align allocation */
+ index = ALIGN_MASK(index, align_mask);
+
+ end = index + nr;
+ if (end > size) {
+ return end;
+ }
+ i = find_next_bit(map, end, index);
+ if (i < end) {
+ start = i + 1;
+ goto again;
+ }
+ return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] & bitmap2[k]) {
+ return 1;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/contrib/qemu/util/bitops.c b/contrib/qemu/util/bitops.c
new file mode 100644
index 000000000..227c38b88
--- /dev/null
+++ b/contrib/qemu/util/bitops.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/bitops.h"
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size >= 4*BITS_PER_LONG) {
+ unsigned long d1, d2, d3;
+ tmp = *p;
+ d1 = *(p+1);
+ d2 = *(p+2);
+ d3 = *(p+3);
+ if (tmp) {
+ goto found_middle;
+ }
+ if (d1 | d2 | d3) {
+ break;
+ }
+ p += 4;
+ result += 4*BITS_PER_LONG;
+ size -= 4*BITS_PER_LONG;
+ }
+ while (size >= BITS_PER_LONG) {
+ if ((tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) { /* Are any bits set? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (~tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) { /* Are any bits zero? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long words;
+ unsigned long tmp;
+
+ /* Start at final word. */
+ words = size / BITS_PER_LONG;
+
+ /* Partial final word? */
+ if (size & (BITS_PER_LONG-1)) {
+ tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+ - (size & (BITS_PER_LONG-1)))));
+ if (tmp) {
+ goto found;
+ }
+ }
+
+ while (words) {
+ tmp = addr[--words];
+ if (tmp) {
+ found:
+ return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+ }
+ }
+
+ /* Not found */
+ return size;
+}
diff --git a/contrib/qemu/util/cutils.c b/contrib/qemu/util/cutils.c
new file mode 100644
index 000000000..0116fcde7
--- /dev/null
+++ b/contrib/qemu/util/cutils.c
@@ -0,0 +1,532 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include <math.h>
+
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+
+void strpadcpy(char *buf, int buf_size, const char *str, char pad)
+{
+ int len = qemu_strnlen(str, buf_size);
+ memcpy(buf, str, len);
+ memset(buf + len, pad, buf_size - len);
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (qemu_toupper(*p) != qemu_toupper(*q))
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+ int i;
+
+ for(i = 0; i < max_len; i++) {
+ if (s[i] == '\0') {
+ break;
+ }
+ }
+ return i;
+}
+
+char *qemu_strsep(char **input, const char *delim)
+{
+ char *result = *input;
+ if (result != NULL) {
+ char *p;
+
+ for (p = result; *p != '\0'; p++) {
+ if (strchr(delim, *p)) {
+ break;
+ }
+ }
+ if (*p == '\0') {
+ *input = NULL;
+ } else {
+ *p = '\0';
+ *input = p + 1;
+ }
+ }
+ return result;
+}
+
+time_t mktimegm(struct tm *tm)
+{
+ time_t t;
+ int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+ if (m < 3) {
+ m += 12;
+ y--;
+ }
+ t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 +
+ y / 400 - 719469);
+ t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+ return t;
+}
+
+int qemu_fls(int i)
+{
+ return 32 - clz32(i);
+}
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+ return fdatasync(fd);
+#else
+ return fsync(fd);
+#endif
+}
+
+/*
+ * Searches for an area with non-zero content in a buffer
+ *
+ * Attention! The len must be a multiple of
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * and addr must be a multiple of sizeof(VECTYPE) due to
+ * restriction of optimizations in this function.
+ *
+ * can_use_buffer_find_nonzero_offset() can be used to check
+ * these requirements.
+ *
+ * The return value is the offset of the non-zero area rounded
+ * down to a multiple of sizeof(VECTYPE) for the first
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * afterwards.
+ *
+ * If the buffer is all zero the return value is equal to len.
+ */
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ const VECTYPE *p = buf;
+ const VECTYPE zero = (VECTYPE){0};
+ size_t i;
+
+ assert(can_use_buffer_find_nonzero_offset(buf, len));
+
+ if (!len) {
+ return 0;
+ }
+
+ for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
+ if (!ALL_EQ(p[i], zero)) {
+ return i * sizeof(VECTYPE);
+ }
+ }
+
+ for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
+ i < len / sizeof(VECTYPE);
+ i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+ VECTYPE tmp0 = p[i + 0] | p[i + 1];
+ VECTYPE tmp1 = p[i + 2] | p[i + 3];
+ VECTYPE tmp2 = p[i + 4] | p[i + 5];
+ VECTYPE tmp3 = p[i + 6] | p[i + 7];
+ VECTYPE tmp01 = tmp0 | tmp1;
+ VECTYPE tmp23 = tmp2 | tmp3;
+ if (!ALL_EQ(tmp01 | tmp23, zero)) {
+ break;
+ }
+ }
+
+ return i * sizeof(VECTYPE);
+}
+
+/*
+ * Checks if a buffer is all zeroes
+ *
+ * Attention! The len must be a multiple of 4 * sizeof(long) due to
+ * restriction of optimizations in this function.
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+ /*
+ * Use long as the biggest available internal data type that fits into the
+ * CPU register and unroll the loop to smooth out the effect of memory
+ * latency.
+ */
+
+ size_t i;
+ long d0, d1, d2, d3;
+ const long * const data = buf;
+
+ /* use vector optimized zero check if possible */
+ if (can_use_buffer_find_nonzero_offset(buf, len)) {
+ return buffer_find_nonzero_offset(buf, len) == len;
+ }
+
+ assert(len % (4 * sizeof(long)) == 0);
+ len /= sizeof(long);
+
+ for (i = 0; i < len; i += 4) {
+ d0 = data[i + 0];
+ d1 = data[i + 1];
+ d2 = data[i + 2];
+ d3 = data[i + 3];
+
+ if (d0 || d1 || d2 || d3) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+ int flags;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags == -1)
+ return -errno;
+
+ if (fcntl(fd, F_SETFL, flags | flag) == -1)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+static int64_t suffix_mul(char suffix, int64_t unit)
+{
+ switch (qemu_toupper(suffix)) {
+ case STRTOSZ_DEFSUFFIX_B:
+ return 1;
+ case STRTOSZ_DEFSUFFIX_KB:
+ return unit;
+ case STRTOSZ_DEFSUFFIX_MB:
+ return unit * unit;
+ case STRTOSZ_DEFSUFFIX_GB:
+ return unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_TB:
+ return unit * unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_PB:
+ return unit * unit * unit * unit * unit;
+ case STRTOSZ_DEFSUFFIX_EB:
+ return unit * unit * unit * unit * unit * unit;
+ }
+ return -1;
+}
+
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
+ * other error.
+ */
+int64_t strtosz_suffix_unit(const char *nptr, char **end,
+ const char default_suffix, int64_t unit)
+{
+ int64_t retval = -EINVAL;
+ char *endptr;
+ unsigned char c;
+ int mul_required = 0;
+ double val, mul, integral, fraction;
+
+ errno = 0;
+ val = strtod(nptr, &endptr);
+ if (isnan(val) || endptr == nptr || errno != 0) {
+ goto fail;
+ }
+ fraction = modf(val, &integral);
+ if (fraction != 0) {
+ mul_required = 1;
+ }
+ c = *endptr;
+ mul = suffix_mul(c, unit);
+ if (mul >= 0) {
+ endptr++;
+ } else {
+ mul = suffix_mul(default_suffix, unit);
+ assert(mul >= 0);
+ }
+ if (mul == 1 && mul_required) {
+ goto fail;
+ }
+ if ((val * mul >= INT64_MAX) || val < 0) {
+ retval = -ERANGE;
+ goto fail;
+ }
+ retval = val * mul;
+
+fail:
+ if (end) {
+ *end = endptr;
+ }
+
+ return retval;
+}
+
+int64_t strtosz_suffix(const char *nptr, char **end, const char default_suffix)
+{
+ return strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+}
+
+int64_t strtosz(const char *nptr, char **end)
+{
+ return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+ int base)
+{
+ int r = 0;
+ char *endp = (char *)s;
+ unsigned long long val = 0;
+
+ if (!s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ errno = 0;
+ val = strtoull(s, &endp, base);
+ if (errno) {
+ r = -errno;
+ goto out;
+ }
+
+ if (endp == s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* make sure we reject negative numbers: */
+ while (isspace((unsigned char)*s)) {
+ s++;
+ }
+ if (*s == '-') {
+ val = 0;
+ r = -ERANGE;
+ goto out;
+ }
+
+out:
+ *value = val;
+ *endptr = endp;
+ return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+ char *endp;
+ int r;
+
+ r = parse_uint(s, value, &endp, base);
+ if (r < 0) {
+ return r;
+ }
+ if (*endp) {
+ *value = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int qemu_parse_fd(const char *param)
+{
+ int fd;
+ char *endptr = NULL;
+
+ fd = strtol(param, &endptr, 10);
+ if (*endptr || (fd == 0 && param == endptr)) {
+ return -1;
+ }
+ return fd;
+}
+
+/* round down to the nearest power of 2*/
+int64_t pow2floor(int64_t value)
+{
+ if (!is_power_of_2(value)) {
+ value = 0x8000000000000000ULL >> clz64(value);
+ }
+ return value;
+}
+
+/*
+ * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+int uleb128_encode_small(uint8_t *out, uint32_t n)
+{
+ g_assert(n <= 0x3fff);
+ if (n < 0x80) {
+ *out++ = n;
+ return 1;
+ } else {
+ *out++ = (n & 0x7f) | 0x80;
+ *out++ = n >> 7;
+ return 2;
+ }
+}
+
+int uleb128_decode_small(const uint8_t *in, uint32_t *n)
+{
+ if (!(*in & 0x80)) {
+ *n = *in++;
+ return 1;
+ } else {
+ *n = *in++ & 0x7f;
+ /* we exceed 14 bit number */
+ if (*in & 0x80) {
+ return -1;
+ }
+ *n |= *in++ << 7;
+ return 2;
+ }
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial)
+{
+ char *debug_env = getenv(name);
+ char *inv = NULL;
+ int debug;
+
+ if (!debug_env) {
+ return initial;
+ }
+ debug = strtol(debug_env, &inv, 10);
+ if (inv == debug_env) {
+ return initial;
+ }
+ if (debug < 0 || debug > max) {
+ fprintf(stderr, "warning: %s not in [0, %d]", name, max);
+ return initial;
+ }
+ return debug;
+}
diff --git a/contrib/qemu/util/error.c b/contrib/qemu/util/error.c
new file mode 100644
index 000000000..53b04354a
--- /dev/null
+++ b/contrib/qemu/util/error.c
@@ -0,0 +1,120 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qjson.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi-types.h"
+#include "qapi/qmp/qerror.h"
+
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+};
+
+void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
+{
+ Error *err;
+ va_list ap;
+
+ if (errp == NULL) {
+ return;
+ }
+ assert(*errp == NULL);
+
+ err = g_malloc0(sizeof(*err));
+
+ va_start(ap, fmt);
+ err->msg = g_strdup_vprintf(fmt, ap);
+ va_end(ap);
+ err->err_class = err_class;
+
+ *errp = err;
+}
+
+void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
+ const char *fmt, ...)
+{
+ Error *err;
+ char *msg1;
+ va_list ap;
+
+ if (errp == NULL) {
+ return;
+ }
+ assert(*errp == NULL);
+
+ err = g_malloc0(sizeof(*err));
+
+ va_start(ap, fmt);
+ msg1 = g_strdup_vprintf(fmt, ap);
+ if (os_errno != 0) {
+ err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
+ g_free(msg1);
+ } else {
+ err->msg = msg1;
+ }
+ va_end(ap);
+ err->err_class = err_class;
+
+ *errp = err;
+}
+
+void error_setg_file_open(Error **errp, int os_errno, const char *filename)
+{
+ error_setg_errno(errp, os_errno, "Could not open '%s'", filename);
+}
+
+Error *error_copy(const Error *err)
+{
+ Error *err_new;
+
+ err_new = g_malloc0(sizeof(*err));
+ err_new->msg = g_strdup(err->msg);
+ err_new->err_class = err->err_class;
+
+ return err_new;
+}
+
+bool error_is_set(Error **errp)
+{
+ return (errp && *errp);
+}
+
+ErrorClass error_get_class(const Error *err)
+{
+ return err->err_class;
+}
+
+const char *error_get_pretty(Error *err)
+{
+ return err->msg;
+}
+
+void error_free(Error *err)
+{
+ if (err) {
+ g_free(err->msg);
+ g_free(err);
+ }
+}
+
+void error_propagate(Error **dst_err, Error *local_err)
+{
+ if (dst_err && !*dst_err) {
+ *dst_err = local_err;
+ } else if (local_err) {
+ error_free(local_err);
+ }
+}
diff --git a/contrib/qemu/util/hbitmap.c b/contrib/qemu/util/hbitmap.c
new file mode 100644
index 000000000..d93683128
--- /dev/null
+++ b/contrib/qemu/util/hbitmap.c
@@ -0,0 +1,402 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits. The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level. When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word
+ * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits. To move down, you shift the index left
+ * similarly, and add the word index within the group. Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once. Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps. Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+ /* Number of total bits in the bottom level. */
+ uint64_t size;
+
+ /* Number of set bits in the bottom level. */
+ uint64_t count;
+
+ /* A scaling factor. Given a granularity of G, each bit in the bitmap will
+ * will actually represent a group of 2^G elements. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire page; iteration will only visit the first
+ * bit of each group. Here is an example of operations in a size-16,
+ * granularity-1 HBitmap:
+ *
+ * initial state 00000000
+ * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8)
+ * reset(start=1, count=3) 00111000 (iter: 4, 6, 8)
+ * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10)
+ * reset(start=5, count=5) 00000000
+ *
+ * From an implementation point of view, when setting or resetting bits,
+ * the bitmap will scale bit numbers right by this amount of bits. When
+ * iterating, the bitmap will scale bit numbers left by this amount of
+ * bits.
+ */
+ int granularity;
+
+ /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+ * coarsest). Each bit in level N represents a word in level N+1 that
+ * has a set bit, except the last level where each bit represents the
+ * actual bitmap.
+ *
+ * Note that all bitmaps have the same number of levels. Even a 1-bit
+ * bitmap will still allocate HBITMAP_LEVELS arrays.
+ */
+ unsigned long *levels[HBITMAP_LEVELS];
+};
+
+static inline int popcountl(unsigned long l)
+{
+ return BITS_PER_LONG == 32 ? ctpop32(l) : ctpop64(l);
+}
+
+/* Advance hbi to the next nonzero word and return it. hbi->pos
+ * is updated. Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+ size_t pos = hbi->pos;
+ const HBitmap *hb = hbi->hb;
+ unsigned i = HBITMAP_LEVELS - 1;
+
+ unsigned long cur;
+ do {
+ cur = hbi->cur[--i];
+ pos >>= BITS_PER_LEVEL;
+ } while (cur == 0);
+
+ /* Check for end of iteration. We always use fewer than BITS_PER_LONG
+ * bits in the level 0 bitmap; thus we can repurpose the most significant
+ * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures
+ * that the above loop ends even without an explicit check on i.
+ */
+
+ if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+ return 0;
+ }
+ for (; i < HBITMAP_LEVELS - 1; i++) {
+ /* Shift back pos to the left, matching the right shifts above.
+ * The index of this word's least significant set bit provides
+ * the low-order bits.
+ */
+ assert(cur);
+ pos = (pos << BITS_PER_LEVEL) + ctzl(cur);
+ hbi->cur[i] = cur & (cur - 1);
+
+ /* Set up next level for iteration. */
+ cur = hb->levels[i + 1][pos];
+ }
+
+ hbi->pos = pos;
+ trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+ assert(cur);
+ return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+ unsigned i, bit;
+ uint64_t pos;
+
+ hbi->hb = hb;
+ pos = first >> hb->granularity;
+ assert(pos < hb->size);
+ hbi->pos = pos >> BITS_PER_LEVEL;
+ hbi->granularity = hb->granularity;
+
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ bit = pos & (BITS_PER_LONG - 1);
+ pos >>= BITS_PER_LEVEL;
+
+ /* Drop bits representing items before first. */
+ hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+ /* We have already added level i+1, so the lowest set bit has
+ * been processed. Clear it.
+ */
+ if (i != HBITMAP_LEVELS - 1) {
+ hbi->cur[i] &= ~(1UL << bit);
+ }
+ }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+ return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+ return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+ return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity. Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+ HBitmapIter hbi;
+ uint64_t count = 0;
+ uint64_t end = last + 1;
+ unsigned long cur;
+ size_t pos;
+
+ hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+ for (;;) {
+ pos = hbitmap_iter_next_word(&hbi, &cur);
+ if (pos >= (end >> BITS_PER_LEVEL)) {
+ break;
+ }
+ count += popcountl(cur);
+ }
+
+ if (pos == (end >> BITS_PER_LEVEL)) {
+ /* Drop bits representing the END-th and subsequent items. */
+ int bit = end & (BITS_PER_LONG - 1);
+ cur &= (1UL << bit) - 1;
+ count += popcountl(cur);
+ }
+
+ return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool changed;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ changed = (*elem == 0);
+ *elem |= mask;
+ return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+ changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] == 0);
+ hb->levels[level][i] = ~0UL;
+ }
+ }
+ changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+ /* If there was any change in this layer, we may have to update
+ * the one above.
+ */
+ if (level > 0 && changed) {
+ hb_set_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_set(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+ count = last - start + 1;
+
+ hb->count += count - hb_count_between(hb, start, last);
+ hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool blanked;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ blanked = *elem != 0 && ((*elem & ~mask) == 0);
+ *elem &= ~mask;
+ return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+ /* Here we need a more complex test than when setting bits. Even if
+ * something was changed, we must not blank bits in the upper level
+ * unless the lower-level word became entirely zero. So, remove pos
+ * from the upper-level range if bits remain set.
+ */
+ if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+ changed = true;
+ } else {
+ pos++;
+ }
+
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] != 0);
+ hb->levels[level][i] = 0UL;
+ }
+ }
+
+ /* Same as above, this time for lastpos. */
+ if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+ changed = true;
+ } else {
+ lastpos--;
+ }
+
+ if (level > 0 && changed) {
+ hb_reset_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_reset(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+
+ hb->count -= hb_count_between(hb, start, last);
+ hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+ /* Compute position and bit in the last layer. */
+ uint64_t pos = item >> hb->granularity;
+ unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+ return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+ unsigned i;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ g_free(hb->levels[i]);
+ }
+ g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+ HBitmap *hb = g_malloc0(sizeof (struct HBitmap));
+ unsigned i;
+
+ assert(granularity >= 0 && granularity < 64);
+ size = (size + (1ULL << granularity) - 1) >> granularity;
+ assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+ hb->size = size;
+ hb->granularity = granularity;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+ hb->levels[i] = g_malloc0(size * sizeof(unsigned long));
+ }
+
+ /* We necessarily have free bits in level 0 due to the definition
+ * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up
+ * hbitmap_iter_skip_words.
+ */
+ assert(size == 1);
+ hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+ return hb;
+}
diff --git a/contrib/qemu/util/hexdump.c b/contrib/qemu/util/hexdump.c
new file mode 100644
index 000000000..969b3406c
--- /dev/null
+++ b/contrib/qemu/util/hexdump.c
@@ -0,0 +1,37 @@
+/*
+ * Helper to hexdump a buffer
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ * Copyright (c) 2013 Xilinx, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
+{
+ unsigned int b;
+
+ for (b = 0; b < size; b++) {
+ if ((b % 16) == 0) {
+ fprintf(fp, "%s: %04x:", prefix, b);
+ }
+ if ((b % 4) == 0) {
+ fprintf(fp, " ");
+ }
+ fprintf(fp, " %02x", (unsigned char)buf[b]);
+ if ((b % 16) == 15) {
+ fprintf(fp, "\n");
+ }
+ }
+ if ((b % 16) != 0) {
+ fprintf(fp, "\n");
+ }
+}
diff --git a/contrib/qemu/util/iov.c b/contrib/qemu/util/iov.c
new file mode 100644
index 000000000..cc6e837c8
--- /dev/null
+++ b/contrib/qemu/util/iov.c
@@ -0,0 +1,426 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Amit Shah <amit.shah@redhat.com>
+ * Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/iov.h"
+
+#ifdef _WIN32
+# include <windows.h>
+# include <winsock2.h>
+#else
+# include <sys/types.h>
+# include <sys/socket.h>
+#endif
+
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, const void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(iov[i].iov_base + offset, buf + done, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(buf + done, iov[i].iov_base + offset, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, int fillc, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memset(iov[i].iov_base + offset, fillc, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#ifdef CONFIG_POSIX
+ ssize_t ret;
+ struct msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_cnt;
+ do {
+ ret = do_send
+ ? sendmsg(sockfd, &msg, 0)
+ : recvmsg(sockfd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+ return ret;
+#else
+ /* else send piece-by-piece */
+ /*XXX Note: windows has WSASend() and WSARecv() */
+ unsigned i = 0;
+ ssize_t ret = 0;
+ while (i < iov_cnt) {
+ ssize_t r = do_send
+ ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+ : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+ if (r > 0) {
+ ret += r;
+ } else if (!r) {
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ } else {
+ /* else it is some "other" error,
+ * only return if there was no data processed. */
+ if (ret == 0) {
+ ret = -1;
+ }
+ break;
+ }
+ i++;
+ }
+ return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes,
+ bool do_send)
+{
+ ssize_t total = 0;
+ ssize_t ret;
+ size_t orig_len, tail;
+ unsigned niov;
+
+ while (bytes > 0) {
+ /* Find the start position, skipping `offset' bytes:
+ * first, skip all full-sized vector elements, */
+ for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
+ offset -= iov[niov].iov_len;
+ }
+
+ /* niov == iov_cnt would only be valid if bytes == 0, which
+ * we already ruled out in the loop condition. */
+ assert(niov < iov_cnt);
+ iov += niov;
+ iov_cnt -= niov;
+
+ if (offset) {
+ /* second, skip `offset' bytes from the (now) first element,
+ * undo it on exit */
+ iov[0].iov_base += offset;
+ iov[0].iov_len -= offset;
+ }
+ /* Find the end position skipping `bytes' bytes: */
+ /* first, skip all full-sized elements */
+ tail = bytes;
+ for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
+ tail -= iov[niov].iov_len;
+ }
+ if (tail) {
+ /* second, fixup the last element, and remember the original
+ * length */
+ assert(niov < iov_cnt);
+ assert(iov[niov].iov_len > tail);
+ orig_len = iov[niov].iov_len;
+ iov[niov++].iov_len = tail;
+ }
+
+ ret = do_send_recv(sockfd, iov, niov, do_send);
+
+ /* Undo the changes above before checking for errors */
+ if (tail) {
+ iov[niov-1].iov_len = orig_len;
+ }
+ if (offset) {
+ iov[0].iov_base -= offset;
+ iov[0].iov_len += offset;
+ }
+
+ if (ret < 0) {
+ assert(errno != EINTR);
+ if (errno == EAGAIN && total > 0) {
+ return total;
+ }
+ return -1;
+ }
+
+ /* Prepare for the next iteration */
+ offset += ret;
+ total += ret;
+ bytes -= ret;
+ }
+
+ return total;
+}
+
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+ FILE *fp, const char *prefix, size_t limit)
+{
+ int v;
+ size_t size = 0;
+ char *buf;
+
+ for (v = 0; v < iov_cnt; v++) {
+ size += iov[v].iov_len;
+ }
+ size = size > limit ? limit : size;
+ buf = g_malloc(size);
+ iov_to_buf(iov, iov_cnt, 0, buf, size);
+ qemu_hexdump(buf, fp, prefix, size);
+ g_free(buf);
+}
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+ const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, size_t bytes)
+{
+ size_t len;
+ unsigned int i, j;
+ for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+ if (offset >= iov[i].iov_len) {
+ offset -= iov[i].iov_len;
+ continue;
+ }
+ len = MIN(bytes, iov[i].iov_len - offset);
+
+ dst_iov[j].iov_base = iov[i].iov_base + offset;
+ dst_iov[j].iov_len = len;
+ j++;
+ bytes -= len;
+ offset = 0;
+ }
+ assert(offset == 0);
+ return j;
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+ qiov->iov = g_malloc(alloc_hint * sizeof(struct iovec));
+ qiov->niov = 0;
+ qiov->nalloc = alloc_hint;
+ qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+ int i;
+
+ qiov->iov = iov;
+ qiov->niov = niov;
+ qiov->nalloc = -1;
+ qiov->size = 0;
+ for (i = 0; i < niov; i++)
+ qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+ assert(qiov->nalloc != -1);
+
+ if (qiov->niov == qiov->nalloc) {
+ qiov->nalloc = 2 * qiov->nalloc + 1;
+ qiov->iov = g_realloc(qiov->iov, qiov->nalloc * sizeof(struct iovec));
+ }
+ qiov->iov[qiov->niov].iov_base = base;
+ qiov->iov[qiov->niov].iov_len = len;
+ qiov->size += len;
+ ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src_iov if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat_iov(QEMUIOVector *dst,
+ struct iovec *src_iov, unsigned int src_cnt,
+ size_t soffset, size_t sbytes)
+{
+ int i;
+ size_t done;
+
+ if (!sbytes) {
+ return;
+ }
+ assert(dst->nalloc != -1);
+ for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+ if (soffset < src_iov[i].iov_len) {
+ size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+ qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
+ done += len;
+ soffset = 0;
+ } else {
+ soffset -= src_iov[i].iov_len;
+ }
+ }
+ assert(soffset == 0); /* offset beyond end of src */
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+ QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+ qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qemu_iovec_reset(qiov);
+ g_free(qiov->iov);
+ qiov->nalloc = 0;
+ qiov->iov = NULL;
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qiov->niov = 0;
+ qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+ void *buf, size_t bytes)
+{
+ return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+ const void *buf, size_t bytes)
+{
+ return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+ int fillc, size_t bytes)
+{
+ return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ for (cur = *iov; *iov_cnt > 0; cur++) {
+ if (cur->iov_len > bytes) {
+ cur->iov_base += bytes;
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ *iov_cnt -= 1;
+ }
+
+ *iov = cur;
+ return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ if (*iov_cnt == 0) {
+ return 0;
+ }
+
+ cur = iov + (*iov_cnt - 1);
+
+ while (*iov_cnt > 0) {
+ if (cur->iov_len > bytes) {
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ cur--;
+ *iov_cnt -= 1;
+ }
+
+ return total;
+}
diff --git a/contrib/qemu/util/module.c b/contrib/qemu/util/module.c
new file mode 100644
index 000000000..7acc33d07
--- /dev/null
+++ b/contrib/qemu/util/module.c
@@ -0,0 +1,81 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/module.h"
+
+typedef struct ModuleEntry
+{
+ void (*init)(void);
+ QTAILQ_ENTRY(ModuleEntry) node;
+} ModuleEntry;
+
+typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
+
+static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+
+static void init_types(void)
+{
+ static int inited;
+ int i;
+
+ if (inited) {
+ return;
+ }
+
+ for (i = 0; i < MODULE_INIT_MAX; i++) {
+ QTAILQ_INIT(&init_type_list[i]);
+ }
+
+ inited = 1;
+}
+
+
+static ModuleTypeList *find_type(module_init_type type)
+{
+ ModuleTypeList *l;
+
+ init_types();
+
+ l = &init_type_list[type];
+
+ return l;
+}
+
+void register_module_init(void (*fn)(void), module_init_type type)
+{
+ ModuleEntry *e;
+ ModuleTypeList *l;
+
+ e = g_malloc0(sizeof(*e));
+ e->init = fn;
+
+ l = find_type(type);
+
+ QTAILQ_INSERT_TAIL(l, e, node);
+}
+
+void module_call_init(module_init_type type)
+{
+ ModuleTypeList *l;
+ ModuleEntry *e;
+
+ l = find_type(type);
+
+ QTAILQ_FOREACH(e, l, node) {
+ e->init();
+ }
+}
diff --git a/contrib/qemu/util/oslib-posix.c b/contrib/qemu/util/oslib-posix.c
new file mode 100644
index 000000000..3dc8b1b07
--- /dev/null
+++ b/contrib/qemu/util/oslib-posix.c
@@ -0,0 +1,243 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* The following block of code temporarily renames the daemon() function so the
+ compiler does not see the warning associated with it in stdlib.h on OSX */
+#ifdef __APPLE__
+#define daemon qemu_fake_daemon_function
+#include <stdlib.h>
+#undef daemon
+extern int daemon(int, int);
+#endif
+
+#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
+ /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
+ Valgrind does not support alignments larger than 1 MiB,
+ therefore we need special code which handles running on Valgrind. */
+# define QEMU_VMALLOC_ALIGN (512 * 4096)
+#elif defined(__linux__) && defined(__s390x__)
+ /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+# define QEMU_VMALLOC_ALIGN (256 * 4096)
+#else
+# define QEMU_VMALLOC_ALIGN getpagesize()
+#endif
+
+#include <glib/gprintf.h>
+
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+#include <sys/mman.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
+int qemu_get_thread_id(void)
+{
+#if defined(__linux__)
+ return syscall(SYS_gettid);
+#else
+ return getpid();
+#endif
+}
+
+int qemu_daemon(int nochdir, int noclose)
+{
+ return daemon(nochdir, noclose);
+}
+
+void *qemu_oom_check(void *ptr)
+{
+ if (ptr == NULL) {
+ fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+ abort();
+ }
+ return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ void *ptr;
+#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
+ int ret;
+ ret = posix_memalign(&ptr, alignment, size);
+ if (ret != 0) {
+ fprintf(stderr, "Failed to allocate %zu B: %s\n",
+ size, strerror(ret));
+ abort();
+ }
+#elif defined(CONFIG_BSD)
+ ptr = qemu_oom_check(valloc(size));
+#else
+ ptr = qemu_oom_check(memalign(alignment, size));
+#endif
+ trace_qemu_memalign(alignment, size, ptr);
+ return ptr;
+}
+
+/* alloc shared memory pages */
+void *qemu_anon_ram_alloc(size_t size)
+{
+ size_t align = QEMU_VMALLOC_ALIGN;
+ size_t total = size + align - getpagesize();
+ void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+
+ if (ptr == MAP_FAILED) {
+ fprintf(stderr, "Failed to allocate %zu B: %s\n",
+ size, strerror(errno));
+ abort();
+ }
+
+ ptr += offset;
+ total -= offset;
+
+ if (offset > 0) {
+ munmap(ptr - offset, offset);
+ }
+ if (total > size) {
+ munmap(ptr + size, total - size);
+ }
+
+ trace_qemu_anon_ram_alloc(size, ptr);
+ return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+ trace_qemu_vfree(ptr);
+ free(ptr);
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+ trace_qemu_anon_ram_free(ptr, size);
+ if (ptr) {
+ munmap(ptr, size);
+ }
+}
+
+void qemu_set_block(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+}
+
+void qemu_set_nonblock(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+void qemu_set_cloexec(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFD);
+ fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+ int ret;
+
+#ifdef CONFIG_PIPE2
+ ret = pipe2(pipefd, O_CLOEXEC);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ ret = pipe(pipefd);
+ if (ret == 0) {
+ qemu_set_cloexec(pipefd[0]);
+ qemu_set_cloexec(pipefd[1]);
+ }
+
+ return ret;
+}
+
+int qemu_utimens(const char *path, const struct timespec *times)
+{
+ struct timeval tv[2], tv_now;
+ struct stat st;
+ int i;
+#ifdef CONFIG_UTIMENSAT
+ int ret;
+
+ ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ /* Fallback: use utimes() instead of utimensat() */
+
+ /* happy if special cases */
+ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
+ return 0;
+ }
+ if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
+ return utimes(path, NULL);
+ }
+
+ /* prepare for hard cases */
+ if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
+ gettimeofday(&tv_now, NULL);
+ }
+ if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
+ stat(path, &st);
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (times[i].tv_nsec == UTIME_NOW) {
+ tv[i].tv_sec = tv_now.tv_sec;
+ tv[i].tv_usec = tv_now.tv_usec;
+ } else if (times[i].tv_nsec == UTIME_OMIT) {
+ tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
+ tv[i].tv_usec = 0;
+ } else {
+ tv[i].tv_sec = times[i].tv_sec;
+ tv[i].tv_usec = times[i].tv_nsec / 1000;
+ }
+ }
+
+ return utimes(path, &tv[0]);
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+ return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
+ relative_pathname);
+}
diff --git a/contrib/qemu/util/qemu-error.c b/contrib/qemu/util/qemu-error.c
new file mode 100644
index 000000000..fec02c607
--- /dev/null
+++ b/contrib/qemu/util/qemu-error.c
@@ -0,0 +1,225 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "monitor/monitor.h"
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf(). Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+ if (cur_mon) {
+ monitor_vprintf(cur_mon, fmt, ap);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO just like error_vprintf()
+ */
+void error_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void error_printf_unless_qmp(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!monitor_cur_is_qmp()) {
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+ }
+}
+
+static Location std_loc = {
+ .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+ assert(!loc->prev);
+ loc->prev = cur_loc;
+ cur_loc = loc;
+ return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+ loc->kind = LOC_NONE;
+ loc->prev = NULL;
+ return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+ assert(cur_loc == loc && loc->prev);
+ cur_loc = loc->prev;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+ *loc = *cur_loc;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+ Location *prev = cur_loc->prev;
+ assert(!loc->prev);
+ *cur_loc = *loc;
+ cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+ cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+ cur_loc->kind = LOC_CMDLINE;
+ cur_loc->num = cnt;
+ cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+ assert (fname || cur_loc->kind == LOC_FILE);
+ cur_loc->kind = LOC_FILE;
+ cur_loc->num = lno;
+ if (fname) {
+ cur_loc->ptr = fname;
+ }
+}
+
+static const char *progname;
+
+/*
+ * Set the program name for error_print_loc().
+ */
+void error_set_progname(const char *argv0)
+{
+ const char *p = strrchr(argv0, '/');
+ progname = p ? p + 1 : argv0;
+}
+
+const char *error_get_progname(void)
+{
+ return progname;
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+void error_print_loc(void)
+{
+ const char *sep = "";
+ int i;
+ const char *const *argp;
+
+ if (!cur_mon && progname) {
+ fprintf(stderr, "%s:", progname);
+ sep = " ";
+ }
+ switch (cur_loc->kind) {
+ case LOC_CMDLINE:
+ argp = cur_loc->ptr;
+ for (i = 0; i < cur_loc->num; i++) {
+ error_printf("%s%s", sep, argp[i]);
+ sep = " ";
+ }
+ error_printf(": ");
+ break;
+ case LOC_FILE:
+ error_printf("%s:", (const char *)cur_loc->ptr);
+ if (cur_loc->num) {
+ error_printf("%d:", cur_loc->num);
+ }
+ error_printf(" ");
+ break;
+ default:
+ error_printf("%s", sep);
+ }
+}
+
+bool enable_timestamp_msg;
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf(). The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor. Use qerror_report() there.
+ */
+void error_report(const char *fmt, ...)
+{
+ va_list ap;
+ GTimeVal tv;
+ gchar *timestr;
+
+ if (enable_timestamp_msg) {
+ g_get_current_time(&tv);
+ timestr = g_time_val_to_iso8601(&tv);
+ error_printf("%s ", timestr);
+ g_free(timestr);
+ }
+
+ error_print_loc();
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+ error_printf("\n");
+}
diff --git a/contrib/qemu/util/qemu-option.c b/contrib/qemu/util/qemu-option.c
new file mode 100644
index 000000000..e0ef426da
--- /dev/null
+++ b/contrib/qemu/util/qemu-option.c
@@ -0,0 +1,1126 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/types.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/option_int.h"
+
+/*
+ * Extracts the name of an option from the parameter string (p points at the
+ * first byte of the option name)
+ *
+ * The option name is delimited by delim (usually , or =) or the string end
+ * and is copied into buf. If the option name is longer than buf_size, it is
+ * truncated. buf is always zero terminated.
+ *
+ * The return value is the position of the delimiter/zero byte after the option
+ * name in p.
+ */
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0' && *p != delim) {
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+/*
+ * Extracts the value of an option from the parameter string p (p points at the
+ * first byte of the option value)
+ *
+ * This function is comparable to get_opt_name with the difference that the
+ * delimiter is fixed to be comma which starts a new option. To specify an
+ * option value that contains commas, double each comma.
+ */
+const char *get_opt_value(char *buf, int buf_size, const char *p)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0') {
+ if (*p == ',') {
+ if (*(p + 1) != ',')
+ break;
+ p++;
+ }
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+int get_next_param_value(char *buf, int buf_size,
+ const char *tag, const char **pstr)
+{
+ const char *p;
+ char option[128];
+
+ p = *pstr;
+ for(;;) {
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=')
+ break;
+ p++;
+ if (!strcmp(tag, option)) {
+ *pstr = get_opt_value(buf, buf_size, p);
+ if (**pstr == ',') {
+ (*pstr)++;
+ }
+ return strlen(buf);
+ } else {
+ p = get_opt_value(NULL, 0, p);
+ }
+ if (*p != ',')
+ break;
+ p++;
+ }
+ return 0;
+}
+
+int get_param_value(char *buf, int buf_size,
+ const char *tag, const char *str)
+{
+ return get_next_param_value(buf, buf_size, tag, &str);
+}
+
+/*
+ * Searches an option list for an option with the given name
+ */
+QEMUOptionParameter *get_option_parameter(QEMUOptionParameter *list,
+ const char *name)
+{
+ while (list && list->name) {
+ if (!strcmp(list->name, name)) {
+ return list;
+ }
+ list++;
+ }
+
+ return NULL;
+}
+
+static void parse_option_bool(const char *name, const char *value, bool *ret,
+ Error **errp)
+{
+ if (value != NULL) {
+ if (!strcmp(value, "on")) {
+ *ret = 1;
+ } else if (!strcmp(value, "off")) {
+ *ret = 0;
+ } else {
+ error_set(errp,QERR_INVALID_PARAMETER_VALUE, name, "'on' or 'off'");
+ }
+ } else {
+ *ret = 1;
+ }
+}
+
+static void parse_option_number(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ uint64_t number;
+
+ if (value != NULL) {
+ number = strtoull(value, &postfix, 0);
+ if (*postfix != '\0') {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ return;
+ }
+ *ret = number;
+ } else {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ }
+}
+
+static void parse_option_size(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ double sizef;
+
+ if (value != NULL) {
+ sizef = strtod(value, &postfix);
+ switch (*postfix) {
+ case 'T':
+ sizef *= 1024;
+ /* fall through */
+ case 'G':
+ sizef *= 1024;
+ /* fall through */
+ case 'M':
+ sizef *= 1024;
+ /* fall through */
+ case 'K':
+ case 'k':
+ sizef *= 1024;
+ /* fall through */
+ case 'b':
+ case '\0':
+ *ret = (uint64_t) sizef;
+ break;
+ default:
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+#if 0 /* conversion from qerror_report() to error_set() broke this: */
+ error_printf_unless_qmp("You may use k, M, G or T suffixes for "
+ "kilobytes, megabytes, gigabytes and terabytes.\n");
+#endif
+ return;
+ }
+ } else {
+ error_set(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+ }
+}
+
+/*
+ * Sets the value of a parameter in a given option list. The parsing of the
+ * value depends on the type of option:
+ *
+ * OPT_FLAG (uses value.n):
+ * If no value is given, the flag is set to 1.
+ * Otherwise the value must be "on" (set to 1) or "off" (set to 0)
+ *
+ * OPT_STRING (uses value.s):
+ * value is strdup()ed and assigned as option value
+ *
+ * OPT_SIZE (uses value.n):
+ * The value is converted to an integer. Suffixes for kilobytes etc. are
+ * allowed (powers of 1024).
+ *
+ * Returns 0 on succes, -1 in error cases
+ */
+int set_option_parameter(QEMUOptionParameter *list, const char *name,
+ const char *value)
+{
+ bool flag;
+ Error *local_err = NULL;
+
+ // Find a matching parameter
+ list = get_option_parameter(list, name);
+ if (list == NULL) {
+ fprintf(stderr, "Unknown option '%s'\n", name);
+ return -1;
+ }
+
+ // Process parameter
+ switch (list->type) {
+ case OPT_FLAG:
+ parse_option_bool(name, value, &flag, &local_err);
+ if (!error_is_set(&local_err)) {
+ list->value.n = flag;
+ }
+ break;
+
+ case OPT_STRING:
+ if (value != NULL) {
+ list->value.s = g_strdup(value);
+ } else {
+ fprintf(stderr, "Option '%s' needs a parameter\n", name);
+ return -1;
+ }
+ break;
+
+ case OPT_SIZE:
+ parse_option_size(name, value, &list->value.n, &local_err);
+ break;
+
+ default:
+ fprintf(stderr, "Bug: Option '%s' has an unknown type\n", name);
+ return -1;
+ }
+
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Sets the given parameter to an integer instead of a string.
+ * This function cannot be used to set string options.
+ *
+ * Returns 0 on success, -1 in error cases
+ */
+int set_option_parameter_int(QEMUOptionParameter *list, const char *name,
+ uint64_t value)
+{
+ // Find a matching parameter
+ list = get_option_parameter(list, name);
+ if (list == NULL) {
+ fprintf(stderr, "Unknown option '%s'\n", name);
+ return -1;
+ }
+
+ // Process parameter
+ switch (list->type) {
+ case OPT_FLAG:
+ case OPT_NUMBER:
+ case OPT_SIZE:
+ list->value.n = value;
+ break;
+
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Frees a option list. If it contains strings, the strings are freed as well.
+ */
+void free_option_parameters(QEMUOptionParameter *list)
+{
+ QEMUOptionParameter *cur = list;
+
+ while (cur && cur->name) {
+ if (cur->type == OPT_STRING) {
+ g_free(cur->value.s);
+ }
+ cur++;
+ }
+
+ g_free(list);
+}
+
+/*
+ * Count valid options in list
+ */
+static size_t count_option_parameters(QEMUOptionParameter *list)
+{
+ size_t num_options = 0;
+
+ while (list && list->name) {
+ num_options++;
+ list++;
+ }
+
+ return num_options;
+}
+
+/*
+ * Append an option list (list) to an option list (dest).
+ *
+ * If dest is NULL, a new copy of list is created.
+ *
+ * Returns a pointer to the first element of dest (or the newly allocated copy)
+ */
+QEMUOptionParameter *append_option_parameters(QEMUOptionParameter *dest,
+ QEMUOptionParameter *list)
+{
+ size_t num_options, num_dest_options;
+
+ num_options = count_option_parameters(dest);
+ num_dest_options = num_options;
+
+ num_options += count_option_parameters(list);
+
+ dest = g_realloc(dest, (num_options + 1) * sizeof(QEMUOptionParameter));
+ dest[num_dest_options].name = NULL;
+
+ while (list && list->name) {
+ if (get_option_parameter(dest, list->name) == NULL) {
+ dest[num_dest_options++] = *list;
+ dest[num_dest_options].name = NULL;
+ }
+ list++;
+ }
+
+ return dest;
+}
+
+/*
+ * Parses a parameter string (param) into an option list (dest).
+ *
+ * list is the template option list. If dest is NULL, a new copy of list is
+ * created. If list is NULL, this function fails.
+ *
+ * A parameter string consists of one or more parameters, separated by commas.
+ * Each parameter consists of its name and possibly of a value. In the latter
+ * case, the value is delimited by an = character. To specify a value which
+ * contains commas, double each comma so it won't be recognized as the end of
+ * the parameter.
+ *
+ * For more details of the parsing see above.
+ *
+ * Returns a pointer to the first element of dest (or the newly allocated copy)
+ * or NULL in error cases
+ */
+QEMUOptionParameter *parse_option_parameters(const char *param,
+ QEMUOptionParameter *list, QEMUOptionParameter *dest)
+{
+ QEMUOptionParameter *allocated = NULL;
+ char name[256];
+ char value[256];
+ char *param_delim, *value_delim;
+ char next_delim;
+
+ if (list == NULL) {
+ return NULL;
+ }
+
+ if (dest == NULL) {
+ dest = allocated = append_option_parameters(NULL, list);
+ }
+
+ while (*param) {
+
+ // Find parameter name and value in the string
+ param_delim = strchr(param, ',');
+ value_delim = strchr(param, '=');
+
+ if (value_delim && (value_delim < param_delim || !param_delim)) {
+ next_delim = '=';
+ } else {
+ next_delim = ',';
+ value_delim = NULL;
+ }
+
+ param = get_opt_name(name, sizeof(name), param, next_delim);
+ if (value_delim) {
+ param = get_opt_value(value, sizeof(value), param + 1);
+ }
+ if (*param != '\0') {
+ param++;
+ }
+
+ // Set the parameter
+ if (set_option_parameter(dest, name, value_delim ? value : NULL)) {
+ goto fail;
+ }
+ }
+
+ return dest;
+
+fail:
+ // Only free the list if it was newly allocated
+ free_option_parameters(allocated);
+ return NULL;
+}
+
+/*
+ * Prints all options of a list that have a value to stdout
+ */
+void print_option_parameters(QEMUOptionParameter *list)
+{
+ while (list && list->name) {
+ switch (list->type) {
+ case OPT_STRING:
+ if (list->value.s != NULL) {
+ printf("%s='%s' ", list->name, list->value.s);
+ }
+ break;
+ case OPT_FLAG:
+ printf("%s=%s ", list->name, list->value.n ? "on" : "off");
+ break;
+ case OPT_SIZE:
+ case OPT_NUMBER:
+ printf("%s=%" PRId64 " ", list->name, list->value.n);
+ break;
+ default:
+ printf("%s=(unknown type) ", list->name);
+ break;
+ }
+ list++;
+ }
+}
+
+/*
+ * Prints an overview of all available options
+ */
+void print_option_help(QEMUOptionParameter *list)
+{
+ printf("Supported options:\n");
+ while (list && list->name) {
+ printf("%-16s %s\n", list->name,
+ list->help ? list->help : "No description available");
+ list++;
+ }
+}
+
+/* ------------------------------------------------------------------ */
+
+static QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (strcmp(opt->name, name) != 0)
+ continue;
+ return opt;
+ }
+ return NULL;
+}
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+ return opt ? opt->str : NULL;
+}
+
+bool qemu_opt_has_help_opt(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (is_help_option(opt->name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL);
+ return opt->value.boolean;
+}
+
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER);
+ return opt->value.uint;
+}
+
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ if (opt == NULL)
+ return defval;
+ assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE);
+ return opt->value.uint;
+}
+
+static void qemu_opt_parse(QemuOpt *opt, Error **errp)
+{
+ if (opt->desc == NULL)
+ return;
+
+ switch (opt->desc->type) {
+ case QEMU_OPT_STRING:
+ /* nothing */
+ return;
+ case QEMU_OPT_BOOL:
+ parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp);
+ break;
+ case QEMU_OPT_NUMBER:
+ parse_option_number(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ case QEMU_OPT_SIZE:
+ parse_option_size(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ default:
+ abort();
+ }
+}
+
+static void qemu_opt_del(QemuOpt *opt)
+{
+ QTAILQ_REMOVE(&opt->opts->head, opt, next);
+ g_free((/* !const */ char*)opt->name);
+ g_free((/* !const */ char*)opt->str);
+ g_free(opt);
+}
+
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+ return opts->list->desc[0].name == NULL;
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; desc[i].name != NULL; i++) {
+ if (strcmp(desc[i].name, name) == 0) {
+ return &desc[i];
+ }
+ }
+
+ return NULL;
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+ bool prepend, Error **errp)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc;
+ Error *local_err = NULL;
+
+ desc = find_desc_by_name(opts->list->desc, name);
+ if (!desc && !opts_accepts_any(opts)) {
+ error_set(errp, QERR_INVALID_PARAMETER, name);
+ return;
+ }
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ if (prepend) {
+ QTAILQ_INSERT_HEAD(&opts->head, opt, next);
+ } else {
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+ }
+ opt->desc = desc;
+ opt->str = g_strdup(value);
+ qemu_opt_parse(opt, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ qemu_opt_del(opt);
+ }
+}
+
+int qemu_opt_set(QemuOpts *opts, const char *name, const char *value)
+{
+ Error *local_err = NULL;
+
+ opt_set(opts, name, value, false, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+
+ return 0;
+}
+
+void qemu_opt_set_err(QemuOpts *opts, const char *name, const char *value,
+ Error **errp)
+{
+ opt_set(opts, name, value, false, errp);
+}
+
+int qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ qerror_report(QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return -1;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.boolean = !!val;
+ opt->str = g_strdup(val ? "on" : "off");
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+ return 0;
+}
+
+int qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ qerror_report(QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return -1;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.uint = val;
+ opt->str = g_strdup_printf("%" PRId64, val);
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+
+ return 0;
+}
+
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+ int abort_on_failure)
+{
+ QemuOpt *opt;
+ int rc = 0;
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ rc = func(opt->name, opt->str, opaque);
+ if (abort_on_failure && rc != 0)
+ break;
+ }
+ return rc;
+}
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id)
+{
+ QemuOpts *opts;
+
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ if (!opts->id && !id) {
+ return opts;
+ }
+ if (opts->id && id && !strcmp(opts->id, id)) {
+ return opts;
+ }
+ }
+ return NULL;
+}
+
+static int id_wellformed(const char *id)
+{
+ int i;
+
+ if (!qemu_isalpha(id[0])) {
+ return 0;
+ }
+ for (i = 1; id[i]; i++) {
+ if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+ int fail_if_exists, Error **errp)
+{
+ QemuOpts *opts = NULL;
+
+ if (id) {
+ if (!id_wellformed(id)) {
+ error_set(errp,QERR_INVALID_PARAMETER_VALUE, "id", "an identifier");
+#if 0 /* conversion from qerror_report() to error_set() broke this: */
+ error_printf_unless_qmp("Identifiers consist of letters, digits, '-', '.', '_', starting with a letter.\n");
+#endif
+ return NULL;
+ }
+ opts = qemu_opts_find(list, id);
+ if (opts != NULL) {
+ if (fail_if_exists && !list->merge_lists) {
+ error_set(errp, QERR_DUPLICATE_ID, id, list->name);
+ return NULL;
+ } else {
+ return opts;
+ }
+ }
+ } else if (list->merge_lists) {
+ opts = qemu_opts_find(list, NULL);
+ if (opts) {
+ return opts;
+ }
+ }
+ opts = g_malloc0(sizeof(*opts));
+ opts->id = g_strdup(id);
+ opts->list = list;
+ loc_save(&opts->loc);
+ QTAILQ_INIT(&opts->head);
+ QTAILQ_INSERT_TAIL(&list->head, opts, next);
+ return opts;
+}
+
+QemuOpts *qemu_opts_create_nofail(QemuOptsList *list)
+{
+ QemuOpts *opts;
+ Error *errp = NULL;
+ opts = qemu_opts_create(list, NULL, 0, &errp);
+ assert_no_error(errp);
+ return opts;
+}
+
+void qemu_opts_reset(QemuOptsList *list)
+{
+ QemuOpts *opts, *next_opts;
+
+ QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) {
+ qemu_opts_del(opts);
+ }
+}
+
+void qemu_opts_loc_restore(QemuOpts *opts)
+{
+ loc_restore(&opts->loc);
+}
+
+int qemu_opts_set(QemuOptsList *list, const char *id,
+ const char *name, const char *value)
+{
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ opts = qemu_opts_create(list, id, 1, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+ return qemu_opt_set(opts, name, value);
+}
+
+const char *qemu_opts_id(QemuOpts *opts)
+{
+ return opts->id;
+}
+
+void qemu_opts_del(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ for (;;) {
+ opt = QTAILQ_FIRST(&opts->head);
+ if (opt == NULL)
+ break;
+ qemu_opt_del(opt);
+ }
+ QTAILQ_REMOVE(&opts->list->head, opts, next);
+ g_free(opts->id);
+ g_free(opts);
+}
+
+int qemu_opts_print(QemuOpts *opts, void *dummy)
+{
+ QemuOpt *opt;
+
+ fprintf(stderr, "%s: %s:", opts->list->name,
+ opts->id ? opts->id : "<noid>");
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ fprintf(stderr, " %s=\"%s\"", opt->name, opt->str);
+ }
+ fprintf(stderr, "\n");
+ return 0;
+}
+
+static int opts_do_parse(QemuOpts *opts, const char *params,
+ const char *firstname, bool prepend)
+{
+ char option[128], value[1024];
+ const char *p,*pe,*pc;
+ Error *local_err = NULL;
+
+ for (p = params; *p != '\0'; p++) {
+ pe = strchr(p, '=');
+ pc = strchr(p, ',');
+ if (!pe || (pc && pc < pe)) {
+ /* found "foo,more" */
+ if (p == params && firstname) {
+ /* implicitly named first option */
+ pstrcpy(option, sizeof(option), firstname);
+ p = get_opt_value(value, sizeof(value), p);
+ } else {
+ /* option without value, probably a flag */
+ p = get_opt_name(option, sizeof(option), p, ',');
+ if (strncmp(option, "no", 2) == 0) {
+ memmove(option, option+2, strlen(option+2)+1);
+ pstrcpy(value, sizeof(value), "off");
+ } else {
+ pstrcpy(value, sizeof(value), "on");
+ }
+ }
+ } else {
+ /* found "foo=bar,more" */
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=') {
+ break;
+ }
+ p++;
+ p = get_opt_value(value, sizeof(value), p);
+ }
+ if (strcmp(option, "id") != 0) {
+ /* store and parse */
+ opt_set(opts, option, value, prepend, &local_err);
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ return -1;
+ }
+ }
+ if (*p != ',') {
+ break;
+ }
+ }
+ return 0;
+}
+
+int qemu_opts_do_parse(QemuOpts *opts, const char *params, const char *firstname)
+{
+ return opts_do_parse(opts, params, firstname, false);
+}
+
+static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
+ int permit_abbrev, bool defaults)
+{
+ const char *firstname;
+ char value[1024], *id = NULL;
+ const char *p;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ assert(!permit_abbrev || list->implied_opt_name);
+ firstname = permit_abbrev ? list->implied_opt_name : NULL;
+
+ if (strncmp(params, "id=", 3) == 0) {
+ get_opt_value(value, sizeof(value), params+3);
+ id = value;
+ } else if ((p = strstr(params, ",id=")) != NULL) {
+ get_opt_value(value, sizeof(value), p+4);
+ id = value;
+ }
+ opts = qemu_opts_create(list, id, !defaults, &local_err);
+ if (opts == NULL) {
+ if (error_is_set(&local_err)) {
+ qerror_report_err(local_err);
+ error_free(local_err);
+ }
+ return NULL;
+ }
+
+ if (opts_do_parse(opts, params, firstname, defaults) != 0) {
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
+ int permit_abbrev)
+{
+ return opts_parse(list, params, permit_abbrev, false);
+}
+
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+ int permit_abbrev)
+{
+ QemuOpts *opts;
+
+ opts = opts_parse(list, params, permit_abbrev, true);
+ assert(opts);
+}
+
+typedef struct OptsFromQDictState {
+ QemuOpts *opts;
+ Error **errp;
+} OptsFromQDictState;
+
+static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque)
+{
+ OptsFromQDictState *state = opaque;
+ char buf[32];
+ const char *value;
+ int n;
+
+ if (!strcmp(key, "id") || error_is_set(state->errp)) {
+ return;
+ }
+
+ switch (qobject_type(obj)) {
+ case QTYPE_QSTRING:
+ value = qstring_get_str(qobject_to_qstring(obj));
+ break;
+ case QTYPE_QINT:
+ n = snprintf(buf, sizeof(buf), "%" PRId64,
+ qint_get_int(qobject_to_qint(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QFLOAT:
+ n = snprintf(buf, sizeof(buf), "%.17g",
+ qfloat_get_double(qobject_to_qfloat(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QBOOL:
+ pstrcpy(buf, sizeof(buf),
+ qbool_get_int(qobject_to_qbool(obj)) ? "on" : "off");
+ value = buf;
+ break;
+ default:
+ return;
+ }
+
+ qemu_opt_set_err(state->opts, key, value, state->errp);
+}
+
+/*
+ * Create QemuOpts from a QDict.
+ * Use value of key "id" as ID if it exists and is a QString.
+ * Only QStrings, QInts, QFloats and QBools are copied. Entries with
+ * other types are silently ignored.
+ */
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+ Error **errp)
+{
+ OptsFromQDictState state;
+ Error *local_err = NULL;
+ QemuOpts *opts;
+
+ opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1,
+ &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+
+ assert(opts != NULL);
+
+ state.errp = &local_err;
+ state.opts = opts;
+ qdict_iter(qdict, qemu_opts_from_qdict_1, &state);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+ const QDictEntry *entry, *next;
+
+ entry = qdict_first(qdict);
+
+ while (entry != NULL) {
+ Error *local_err = NULL;
+ OptsFromQDictState state = {
+ .errp = &local_err,
+ .opts = opts,
+ };
+
+ next = qdict_next(qdict, entry);
+
+ if (find_desc_by_name(opts->list->desc, entry->key)) {
+ qemu_opts_from_qdict_1(entry->key, entry->value, &state);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ } else {
+ qdict_del(qdict, entry->key);
+ }
+ }
+
+ entry = next;
+ }
+}
+
+/*
+ * Convert from QemuOpts to QDict.
+ * The QDict values are of type QString.
+ * TODO We'll want to use types appropriate for opt->desc->type, but
+ * this is enough for now.
+ */
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict)
+{
+ QemuOpt *opt;
+ QObject *val;
+
+ if (!qdict) {
+ qdict = qdict_new();
+ }
+ if (opts->id) {
+ qdict_put(qdict, "id", qstring_from_str(opts->id));
+ }
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ val = QOBJECT(qstring_from_str(opt->str));
+ qdict_put_obj(qdict, opt->name, val);
+ }
+ return qdict;
+}
+
+/* Validate parsed opts against descriptions where no
+ * descriptions were provided in the QemuOptsList.
+ */
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
+{
+ QemuOpt *opt;
+ Error *local_err = NULL;
+
+ assert(opts_accepts_any(opts));
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ opt->desc = find_desc_by_name(desc, opt->name);
+ if (!opt->desc) {
+ error_set(errp, QERR_INVALID_PARAMETER, opt->name);
+ return;
+ }
+
+ qemu_opt_parse(opt, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
+
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, void *opaque,
+ int abort_on_failure)
+{
+ Location loc;
+ QemuOpts *opts;
+ int rc = 0;
+
+ loc_push_none(&loc);
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ loc_restore(&opts->loc);
+ rc |= func(opts, opaque);
+ if (abort_on_failure && rc != 0)
+ break;
+ }
+ loc_pop(&loc);
+ return rc;
+}
diff --git a/contrib/qemu/util/qemu-thread-posix.c b/contrib/qemu/util/qemu-thread-posix.c
new file mode 100644
index 000000000..4489abf1d
--- /dev/null
+++ b/contrib/qemu/util/qemu-thread-posix.c
@@ -0,0 +1,327 @@
+/*
+ * Wrappers around mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2009
+ *
+ * Author:
+ * Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include "qemu/thread.h"
+
+static void error_exit(int err, const char *msg)
+{
+ fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
+ abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+ int err;
+ pthread_mutexattr_t mutexattr;
+
+ pthread_mutexattr_init(&mutexattr);
+ pthread_mutexattr_settype(&mutexattr, PTHREAD_MUTEX_ERRORCHECK);
+ err = pthread_mutex_init(&mutex->lock, &mutexattr);
+ pthread_mutexattr_destroy(&mutexattr);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_destroy(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_lock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+ return pthread_mutex_trylock(&mutex->lock);
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_unlock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_init(&cond->cond, NULL);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_destroy(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_signal(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_broadcast(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_cond_wait(&cond->cond, &mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_mutex_init(&sem->lock, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_cond_init(&sem->cond, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ if (init < 0) {
+ error_exit(EINVAL, __func__);
+ }
+ sem->count = init;
+#else
+ rc = sem_init(&sem->sem, 0, init);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_cond_destroy(&sem->cond);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_mutex_destroy(&sem->lock);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_destroy(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ if (sem->count == INT_MAX) {
+ rc = EINVAL;
+ } else if (sem->count++ < 0) {
+ rc = pthread_cond_signal(&sem->cond);
+ } else {
+ rc = 0;
+ }
+ pthread_mutex_unlock(&sem->lock);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_post(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+ ts->tv_sec = tv.tv_sec + ms / 1000;
+ if (ts->tv_nsec >= 1000000000) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000;
+ }
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+ int rc;
+ struct timespec ts;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ compute_abs_deadline(&ts, ms);
+ pthread_mutex_lock(&sem->lock);
+ --sem->count;
+ while (sem->count < 0) {
+ rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
+ if (rc == ETIMEDOUT) {
+ ++sem->count;
+ break;
+ }
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ }
+ pthread_mutex_unlock(&sem->lock);
+ return (rc == ETIMEDOUT ? -1 : 0);
+#else
+ if (ms <= 0) {
+ /* This is cheaper than sem_timedwait. */
+ do {
+ rc = sem_trywait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == EAGAIN) {
+ return -1;
+ }
+ } else {
+ compute_abs_deadline(&ts, ms);
+ do {
+ rc = sem_timedwait(&sem->sem, &ts);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == ETIMEDOUT) {
+ return -1;
+ }
+ }
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+ return 0;
+#endif
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ --sem->count;
+ while (sem->count < 0) {
+ pthread_cond_wait(&sem->cond, &sem->lock);
+ }
+ pthread_mutex_unlock(&sem->lock);
+#else
+ int rc;
+
+ do {
+ rc = sem_wait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_thread_create(QemuThread *thread,
+ void *(*start_routine)(void*),
+ void *arg, int mode)
+{
+ sigset_t set, oldset;
+ int err;
+ pthread_attr_t attr;
+
+ err = pthread_attr_init(&attr);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ if (mode == QEMU_THREAD_DETACHED) {
+ err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ }
+
+ /* Leave signal handling to the iothread. */
+ sigfillset(&set);
+ pthread_sigmask(SIG_SETMASK, &set, &oldset);
+ err = pthread_create(&thread->thread, &attr, start_routine, arg);
+ if (err)
+ error_exit(err, __func__);
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+
+ pthread_attr_destroy(&attr);
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+ thread->thread = pthread_self();
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+ return pthread_equal(pthread_self(), thread->thread);
+}
+
+void qemu_thread_exit(void *retval)
+{
+ pthread_exit(retval);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+ int err;
+ void *ret;
+
+ err = pthread_join(thread->thread, &ret);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ return ret;
+}
diff --git a/contrib/qemu/util/unicode.c b/contrib/qemu/util/unicode.c
new file mode 100644
index 000000000..d1c865885
--- /dev/null
+++ b/contrib/qemu/util/unicode.c
@@ -0,0 +1,100 @@
+/*
+ * Dealing with Unicode
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+/**
+ * mod_utf8_codepoint:
+ * @s: string encoded in modified UTF-8
+ * @n: maximum number of bytes to read from @s, if less than 6
+ * @end: set to end of sequence on return
+ *
+ * Convert the modified UTF-8 sequence at the start of @s. Modified
+ * UTF-8 is exactly like UTF-8, except U+0000 is encoded as
+ * "\xC0\x80".
+ *
+ * If @n is zero or @s points to a zero byte, the sequence is invalid,
+ * and @end is set to @s.
+ *
+ * If @s points to an impossible byte (0xFE or 0xFF) or a continuation
+ * byte, the sequence is invalid, and @end is set to @s + 1
+ *
+ * Else, the first byte determines how many continuation bytes are
+ * expected. If there are fewer, the sequence is invalid, and @end is
+ * set to @s + 1 + actual number of continuation bytes. Else, the
+ * sequence is well-formed, and @end is set to @s + 1 + expected
+ * number of continuation bytes.
+ *
+ * A well-formed sequence is valid unless it encodes a codepoint
+ * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66
+ * noncharacters, a surrogate codepoint, or is overlong. Except the
+ * overlong sequence "\xC0\x80" is valid.
+ *
+ * Conversion succeeds if and only if the sequence is valid.
+ *
+ * Returns: the Unicode codepoint on success, -1 on failure.
+ */
+int mod_utf8_codepoint(const char *s, size_t n, char **end)
+{
+ static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
+ const unsigned char *p;
+ unsigned byte, mask, len, i;
+ int cp;
+
+ if (n == 0 || *s == 0) {
+ /* empty sequence */
+ *end = (char *)s;
+ return -1;
+ }
+
+ p = (const unsigned char *)s;
+ byte = *p++;
+ if (byte < 0x80) {
+ cp = byte; /* one byte sequence */
+ } else if (byte >= 0xFE) {
+ cp = -1; /* impossible bytes 0xFE, 0xFF */
+ } else if ((byte & 0x40) == 0) {
+ cp = -1; /* unexpected continuation byte */
+ } else {
+ /* multi-byte sequence */
+ len = 0;
+ for (mask = 0x80; byte & mask; mask >>= 1) {
+ len++;
+ }
+ assert(len > 1 && len < 7);
+ cp = byte & (mask - 1);
+ for (i = 1; i < len; i++) {
+ byte = i < n ? *p : 0;
+ if ((byte & 0xC0) != 0x80) {
+ cp = -1; /* continuation byte missing */
+ goto out;
+ }
+ p++;
+ cp <<= 6;
+ cp |= byte & 0x3F;
+ }
+ if (cp > 0x10FFFF) {
+ cp = -1; /* beyond Unicode range */
+ } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
+ || (cp & 0xFFFE) == 0xFFFE) {
+ cp = -1; /* noncharacter */
+ } else if (cp >= 0xD800 && cp <= 0xDFFF) {
+ cp = -1; /* surrogate code point */
+ } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
+ cp = -1; /* overlong, not \xC0\x80 */
+ }
+ }
+
+out:
+ *end = (char *)p;
+ return cp;
+}
diff --git a/contrib/stdlib/gf_mkostemp.c b/contrib/stdlib/gf_mkostemp.c
new file mode 100644
index 000000000..931249a45
--- /dev/null
+++ b/contrib/stdlib/gf_mkostemp.c
@@ -0,0 +1,107 @@
+/* Borrowed from glibc-2.16/sysdeps/posix/tempname.c */
+
+/* Copyright (C) 1991-2001, 2006, 2007, 2009 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <time.h>
+#include <inttypes.h>
+
+static const char letters[] =
+"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+
+/* Generate a temporary file name based on TMPL. TMPL must match the
+ rules for mk[s]temp (i.e. end in "XXXXXX", possibly with a suffix).
+*/
+
+#if !defined(TMP_MAX)
+#define TMP_MAX 238328
+#endif
+
+int
+gf_mkostemp (char *tmpl, int suffixlen, int flags)
+{
+ int len;
+ char *XXXXXX;
+ static uint64_t value;
+ uint64_t random_time_bits;
+ unsigned int count;
+ int fd = -1;
+
+ /* A lower bound on the number of temporary files to attempt to
+ generate. The maximum total number of temporary file names that
+ can exist for a given template is 62**6. It should never be
+ necessary to try all these combinations. Instead if a reasonable
+ number of names is tried (we define reasonable as 62**3) fail to
+ give the system administrator the chance to remove the problems. */
+
+ unsigned int attempts = TMP_MAX; /* TMP_MAX == 62³ */
+
+ len = strlen (tmpl);
+ if (len < 6 + suffixlen || memcmp (&tmpl[len - 6 - suffixlen],
+ "XXXXXX", 6))
+ return -1;
+
+ /* This is where the Xs start. */
+ XXXXXX = &tmpl[len - 6 - suffixlen];
+
+ /* Get some more or less random data. */
+# if HAVE_GETTIMEOFDAY
+ struct timeval tv;
+ gettimeofday (&tv, NULL);
+ random_time_bits = ((uint64_t) tv.tv_usec << 16) ^ tv.tv_sec;
+# else
+ random_time_bits = time (NULL);
+# endif
+
+ value += random_time_bits ^ getpid ();
+
+ for (count = 0; count < attempts; value += 7777, ++count) {
+ uint64_t v = value;
+
+ /* Fill in the random bits. */
+ XXXXXX[0] = letters[v % 62];
+ v /= 62;
+ XXXXXX[1] = letters[v % 62];
+ v /= 62;
+ XXXXXX[2] = letters[v % 62];
+ v /= 62;
+ XXXXXX[3] = letters[v % 62];
+ v /= 62;
+ XXXXXX[4] = letters[v % 62];
+ v /= 62;
+ XXXXXX[5] = letters[v % 62];
+
+ fd = open (tmpl, (flags & ~O_ACCMODE)
+ | O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
+
+ if (fd >= 0)
+ return fd;
+ else if (errno != EEXIST)
+ return -1;
+ }
+
+ /* We got out of the loop because we ran out of combinations to try. */
+ return -1;
+}
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 5eea66182..1103b607d 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,8 +1,5 @@
-EXTRA_DIST = glusterfs.vol.sample glusterfsd.vol.sample glusterfs.8 mount.glusterfs.8\
- glusterd.vol gluster.8 glusterd.8 glusterfsd.8
-
-voldir = $(sysconfdir)/glusterfs
-vol_DATA = glusterd.vol
+EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
+ glusterd.8 glusterfsd.8
man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8 glusterd.8 glusterfsd.8
diff --git a/doc/admin-guide/en-US/Administration_Guide.ent b/doc/admin-guide/en-US/Administration_Guide.ent
deleted file mode 100644
index 3381b2bfe..000000000
--- a/doc/admin-guide/en-US/Administration_Guide.ent
+++ /dev/null
@@ -1,4 +0,0 @@
-<!ENTITY PRODUCT "Documentation">
-<!ENTITY BOOKID "Administration_Guide">
-<!ENTITY YEAR "2012">
-<!ENTITY HOLDER "Red Hat Inc">
diff --git a/doc/admin-guide/en-US/Administration_Guide.xml b/doc/admin-guide/en-US/Administration_Guide.xml
deleted file mode 100644
index 483855b1a..000000000
--- a/doc/admin-guide/en-US/Administration_Guide.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<book>
- <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Preface.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="gfs_introduction.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_start_stop_daemon.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_console.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_storage_pools.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_setting_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_settingup_clients.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_managing_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_geo-replication.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_directory_Quota.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_monitoring_workload.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_ACLs.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_UFO.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_Hadoop.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_troubleshooting.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_commandref.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="glossary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</book>
-
diff --git a/doc/admin-guide/en-US/Author_Group.xml b/doc/admin-guide/en-US/Author_Group.xml
deleted file mode 100644
index f3fa31740..000000000
--- a/doc/admin-guide/en-US/Author_Group.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<authorgroup>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <affiliation>
- <orgname>Red Hat</orgname>
- <orgdiv>Engineering Content Services</orgdiv>
- </affiliation>
- <email>divya@redhat.com</email>
- </author>
-</authorgroup>
-
diff --git a/doc/admin-guide/en-US/Book_Info.xml b/doc/admin-guide/en-US/Book_Info.xml
deleted file mode 100644
index 6be6a7816..000000000
--- a/doc/admin-guide/en-US/Book_Info.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<bookinfo id="book-Administration_Guide-Administration_Guide">
- <title>Administration Guide</title>
- <subtitle>Using Gluster File System <remark> Beta 3</remark> </subtitle>
- <productname>Gluster File System</productname>
- <productnumber>3.3</productnumber>
- <edition>1</edition>
- <pubsnumber>1</pubsnumber>
- <abstract>
- <para>
- This guide describes Gluster File System (GlusterFS) and provides information on how to configure, operate, and manage GlusterFS.
- </para>
- </abstract>
- <corpauthor>
- <inlinemediaobject>
- <imageobject>
- <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
- </imageobject>
- </inlinemediaobject>
- </corpauthor>
- <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</bookinfo>
-
diff --git a/doc/admin-guide/en-US/Chapter.xml b/doc/admin-guide/en-US/Chapter.xml
deleted file mode 100644
index 4a1cef872..000000000
--- a/doc/admin-guide/en-US/Chapter.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Test_Chapter">
- <title>Test Chapter</title>
- <para>
- This is a test paragraph
- </para>
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_1">
- <title>Test Section 1</title>
- <para>
- This is a test paragraph in a section
- </para>
- </section>
-
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_2">
- <title>Test Section 2</title>
- <para>
- This is a test paragraph in Section 2
- <orderedlist>
- <listitem>
- <para>
- listitem text
- </para>
- </listitem>
- </orderedlist>
- </para>
- </section>
-
-</chapter>
-
diff --git a/doc/admin-guide/en-US/Preface.xml b/doc/admin-guide/en-US/Preface.xml
deleted file mode 100644
index 320311906..000000000
--- a/doc/admin-guide/en-US/Preface.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE preface PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<preface id="pref-Administration_Guide-Preface">
- <title>Preface</title>
- <para>This guide describes how to configure, operate, and manage Gluster File System (GlusterFS).</para>
- <section>
- <title>Audience</title>
- <para>This guide is intended for Systems Administrators interested in configuring and managing GlusterFS.</para>
- <para>This guide assumes that you are familiar with the Linux operating system, concepts of File System, GlusterFS concepts, and GlusterFS Installation</para>
- </section>
- <section>
- <title>License</title>
- <para>The License information is available at <ulink url="http://www.redhat.com/licenses/rhel_rha_eula.html"/>.</para>
- </section>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Conventions.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Feedback.xml">
- <xi:fallback xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Feedback.xml"/>
- </xi:fallback>
- </xi:include>
-</preface>
diff --git a/doc/admin-guide/en-US/Revision_History.xml b/doc/admin-guide/en-US/Revision_History.xml
deleted file mode 100644
index 09320821f..000000000
--- a/doc/admin-guide/en-US/Revision_History.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<appendix id="appe-Administration_Guide-Revision_History">
- <title>Revision History</title>
- <simpara>
- <revhistory>
- <revision>
- <revnumber>1-0</revnumber>
- <date>Thu Apr 5 2012</date>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <email>divya@redhat.com</email>
- </author>
- <revdescription>
- <simplelist>
- <member>Draft </member>
- </simplelist>
- </revdescription>
- </revision>
- </revhistory>
- </simpara>
-</appendix>
-
diff --git a/doc/admin-guide/en-US/admin_ACLs.xml b/doc/admin-guide/en-US/admin_ACLs.xml
deleted file mode 100644
index 156e52c17..000000000
--- a/doc/admin-guide/en-US/admin_ACLs.xml
+++ /dev/null
@@ -1,206 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-ACLs">
- <title>POSIX Access Control Lists </title>
- <para>POSIX Access Control Lists (ACLs) allows you to assign different permissions for different users or
-groups even though they do not correspond to the original owner or the owning group.
- </para>
- <para>For example: User john creates a file but does not want to allow anyone to do anything with this
-file, except another user, antony (even though there are other users that belong to the group john).
-</para>
- <para>This means, in addition to the file owner, the file group, and others, additional users and groups can
-be granted or denied access by using POSIX ACLs.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs">
- <title>Activating POSIX ACLs Support </title>
- <para>To use POSIX ACLs for a file or directory, the partition of the file or directory must be mounted with
-POSIX ACLs support.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs-Server">
- <title>Activating POSIX ACLs Support on Sever </title>
- <para>To mount the backend export directories for POSIX ACLs support, use the following command:
-</para>
- <para><command># mount -o acl <replaceable>device-name</replaceable><replaceable>partition</replaceable></command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -o acl /dev/sda1 /export1 </command></para>
- <para>Alternatively, if the partition is listed in the /etc/fstab file, add the following entry for the partition
-to include the POSIX ACLs option:
-</para>
- <para><command>LABEL=/work /export1 ext3 rw, acl 14 </command></para>
- </section>
- <section>
- <title>Activating POSIX ACLs Support on Client </title>
- <para>To mount the glusterfs volumes for POSIX ACLs support, use the following command:
-</para>
- <para><command># mount –t glusterfs -o acl <replaceable>severname:volume-id</replaceable><replaceable>mount point</replaceable></command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster</command>
-</para>
- </section>
- </section>
- <section>
- <title>Setting POSIX ACLs </title>
- <para>You can set two types of POSIX ACLs, that is, access ACLs and default ACLs. You can use
-access ACLs to grant permission for a specific file or directory. You can use default ACLs only
-on a directory but if a file inside that directory does not have an ACLs, it inherits the permissions of
-the default ACLs of the directory.
-</para>
- <para>You can set ACLs for per user, per group, for users not in the user group for the file, and via the
-effective right mask.
-</para>
- <section>
- <title>Setting Access ACLs </title>
- <para>You can apply access ACLs to grant permission for both files and directories.
-</para>
- <para><emphasis role="bold">To set or modify Access ACLs</emphasis>
-</para>
- <para>You can set or modify access ACLs use the following command:
-</para>
- <para><command># setfacl –m <replaceable>entry type</replaceable> file </command></para>
- <para>The ACL entry types are the POSIX ACLs representations of owner, group, and other.
-</para>
- <para>Permissions must be a combination of the characters <command>r</command> (read), <command>w</command> (write), and <command>x</command> (execute). You must
-specify the ACL entry in the following format and can specify multiple entry types separated by
-commas.
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>ACL Entry</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>u:uid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a user. You can specify user name or UID </entry>
- </row>
- <row>
- <entry>g:gid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a group. You can specify group name or GID. </entry>
- </row>
- <row>
- <entry>m:&lt;permission&gt; </entry>
- <entry>Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. </entry>
- </row>
- <row>
- <entry>o:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for users other than the ones in the group for the file. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>If a file or directory already has an POSIX ACLs, and the setfacl command is used, the additional
-permissions are added to the existing POSIX ACLs or the existing rule is modified.
-</para>
- <para>For example, to give read and write permissions to user antony:
-</para>
- <para><command># setfacl -m u:antony:rw /mnt/gluster/data/testfile </command></para>
- </section>
- <section>
- <title>Setting Default ACLs </title>
- <para>You can apply default ACLs only to directories. They determine the permissions of a file system
-objects that inherits from its parent directory when it is created.
-</para>
- <para>To set default ACLs
-</para>
- <para>You can set default ACLs for files and directories using the following command:
-</para>
- <para><command># setfacl –m –-set <replaceable>entry type directory</replaceable></command>
-</para>
- <para>For example, to set the default ACLs for the /data directory to read for users not in the user group:
-</para>
- <para><command># setfacl –m --set o::r /mnt/gluster/data </command></para>
- <para><note>
- <para>An access ACLs set for an individual file can override the default ACLs permissions.
-</para>
- </note></para>
- <para><emphasis role="bold">Effects of a Default ACLs </emphasis></para>
- <para>The following are the ways in which the permissions of a directory&apos;s default ACLs are passed to the
-files and subdirectories in it:
-</para>
- <itemizedlist>
- <listitem>
- <para>A subdirectory inherits the default ACLs of the parent directory both as its default ACLs and as an
-access ACLs.
-</para>
- </listitem>
- <listitem>
- <para>A file inherits the default ACLs as its access ACLs.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Retrieving POSIX ACLs </title>
- <para>You can view the existing POSIX ACLs for a file or directory.
-</para>
- <para><emphasis role="bold">To view existing POSIX ACLs </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View the existing access ACLs of a file using the following command:
-</para>
- <para><command># getfacl <replaceable>path/filename</replaceable></command>
-</para>
- <para>For example, to view the existing POSIX ACLs for sample.jpg
-</para>
- <programlisting># getfacl /mnt/gluster/data/test/sample.jpg
-# owner: antony
-# group: antony
-user::rw-
-group::rw-
-other::r--</programlisting>
- </listitem>
- <listitem>
- <para>View the default ACLs of a directory using the following command:
-</para>
- <para><command># getfacl <replaceable>directory name</replaceable></command></para>
- <para>For example, to view the existing ACLs for /data/doc
-</para>
- <programlisting># getfacl /mnt/gluster/data/doc
-# owner: antony
-# group: antony
-user::rw-
-user:john:r--
-group::r--
-mask::r--
-other::r--
-default:user::rwx
-default:user:antony:rwx
-default:group::r-x
-default:mask::rwx
-default:other::r-x</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Removing POSIX ACLs </title>
- <para>To remove all the permissions for a user, groups, or others, use the following command:
-</para>
- <para><command># setfacl -x <replaceable>ACL entry type file</replaceable></command></para>
- <para>For example, to remove all permissions from the user antony:
-</para>
- <para><command># setfacl -x u:antony /mnt/gluster/data/test-file</command></para>
- </section>
- <section>
- <title>Samba and ACLs </title>
- <para>If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs are enabled by default.
-Samba has been compiled with the <command>--with-acl-support</command> option, so no special flags are required
-when accessing or mounting a Samba share.
-</para>
- </section>
- <section>
- <title>NFS and ACLs </title>
- <para>Currently we do not support ACLs configuration through NFS, i.e. setfacl and getfacl commands do
-not work. However, ACLs permissions set using Gluster Native Client applies on NFS mounts.
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_Hadoop.xml b/doc/admin-guide/en-US/admin_Hadoop.xml
deleted file mode 100644
index 08bac8961..000000000
--- a/doc/admin-guide/en-US/admin_Hadoop.xml
+++ /dev/null
@@ -1,244 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Hadoop">
- <title>Managing Hadoop Compatible Storage </title>
- <para>GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system
-APIs available in Hadoop to provide a new storage option for Hadoop deployments. Existing
-MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data
-within Hadoop deployments to any file-based or object-based application.
-
- </para>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Architecture_Overview">
- <title>Architecture Overview </title>
- <para>The following diagram illustrates Hadoop integration with GlusterFS:
-<mediaobject>
- <imageobject>
- <imagedata fileref="images/Hadoop_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </para>
- </section>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Advantages">
- <title>Advantages </title>
- <para>
-The following are the advantages of Hadoop Compatible Storage with GlusterFS:
-
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Provides simultaneous file-based and object-based access within Hadoop.
-</para>
- </listitem>
- <listitem>
- <para>Eliminates the centralized metadata server.
-</para>
- </listitem>
- <listitem>
- <para>Provides compatibility with MapReduce applications and rewrite is not required.
-</para>
- </listitem>
- <listitem>
- <para>Provides a fault tolerant file system.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Install Hadoop Compatible Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during installation of Hadoop compatible storage.
-
-</para>
- <section id="sect-Administration_Guide-Hadoop-Preparation">
- <title>Pre-requisites </title>
- <para>The following are the pre-requisites to install Hadoop Compatible
-Storage :
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Hadoop 0.20.2 is installed, configured, and is running on all the machines in the cluster.
-</para>
- </listitem>
- <listitem>
- <para>Java Runtime Environment
-</para>
- </listitem>
- <listitem>
- <para>Maven (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>JDK (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>getfattr
-- command line utility</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing, and Configuring Hadoop Compatible Storage</title>
- <para>This section describes how to install and configure Hadoop Compatible Storage in your storage
-environment and verify that it is functioning correctly.
-
-</para>
- <orderedlist>
- <para>To install and configure Hadoop compatible storage:</para>
- <listitem>
- <para>Download <filename>glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</filename> file to each server on your cluster. You can download the file from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm"/>.
-
-</para>
- </listitem>
- <listitem>
- <para>To install Hadoop Compatible Storage on all servers in your cluster, run the following command:
-</para>
- <para><command># rpm –ivh --nodeps glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</command>
-</para>
- <para>The following files will be extracted:
- </para>
- <itemizedlist>
- <listitem>
- <para>/usr/local/lib/glusterfs-<replaceable>Hadoop-version-gluster_plugin_version</replaceable>.jar </para>
- </listitem>
- <listitem>
- <para> /usr/local/lib/conf/core-site.xml</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>(Optional) To install Hadoop Compatible Storage in a different location, run the following
-command:
-</para>
- <para><command># rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop glusterfs-hadoop- 0.20.2-0.1.x86_64.rpm</command>
-</para>
- </listitem>
- <listitem>
- <para>Edit the <filename>conf/core-site.xml</filename> file. The following is the sample <filename>conf/core-site.xml</filename> file:
-</para>
- <para><programlisting>&lt;configuration&gt;
- &lt;property&gt;
- &lt;name&gt;fs.glusterfs.impl&lt;/name&gt;
- &lt;value&gt;org.apache.hadoop.fs.glusterfs.Gluster FileSystem&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.default.name&lt;/name&gt;
- &lt;value&gt;glusterfs://fedora1:9000&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.volname&lt;/name&gt;
- &lt;value&gt;hadoopvol&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.mount&lt;/name&gt;
- &lt;value&gt;/mnt/glusterfs&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.server&lt;/name&gt;
- &lt;value&gt;fedora2&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;quick.slave.io&lt;/name&gt;
- &lt;value&gt;Off&lt;/value&gt;
-&lt;/property&gt;
-&lt;/configuration&gt;
-</programlisting></para>
- <para>The following are the configurable fields:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <colspec colnum="3" colname="c2" colsep="0"/>
- <thead>
- <row>
- <entry>Property Name </entry>
- <entry>Default Value </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>fs.default.name </entry>
- <entry>glusterfs://fedora1:9000</entry>
- <entry>Any hostname in the cluster as the server and any port number. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.volname </entry>
- <entry>hadoopvol </entry>
- <entry>GlusterFS volume to mount. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.mount </entry>
- <entry>/mnt/glusterfs</entry>
- <entry>The directory used to fuse mount the volume.</entry>
- </row>
- <row>
- <entry>fs.glusterfs.server </entry>
- <entry>fedora2</entry>
- <entry>Any hostname or IP address on the cluster except the client/master. </entry>
- </row>
- <row>
- <entry>quick.slave.io </entry>
- <entry>Off </entry>
- <entry>Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. <note>
- <para>This option is not tested widely</para>
- </note></entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <para>Create a soft link in Hadoop’s library and configuration directory for the downloaded files (in
-Step 3) using the following commands:
-</para>
- <para><command># ln -s <replaceable>&lt;target location&gt; &lt;source location</replaceable>&gt;</command>
-</para>
- <para>For example,
-</para>
- <para><command># ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar <replaceable>$HADOOP_HOME</replaceable>/lib/glusterfs-0.20.2-0.1.jar</command>
-</para>
- <para><command># ln –s /usr/local/lib/conf/core-site.xml <replaceable>$HADOOP_HOME</replaceable>/conf/core-site.xml </command></para>
- </listitem>
- <listitem>
- <para> (Optional) You can run the following command on Hadoop master to build the plugin and deploy
-it along with core-site.xml file, instead of repeating the above steps:
-</para>
- <para><command># build-deploy-jar.py -d <replaceable>$HADOOP_HOME</replaceable> -c </command></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Starting and Stopping the Hadoop MapReduce Daemon</title>
- <para>To start and stop MapReduce daemon</para>
- <itemizedlist>
- <listitem>
- <para>To start MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/start-mapred.sh</command>
-</para>
- </listitem>
- <listitem>
- <para>To stop MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/stop-mapred.sh </command></para>
- </listitem>
- </itemizedlist>
- <para><note>
- <para>You must start Hadoop MapReduce daemon on all servers.
-</para>
- </note></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_UFO.xml b/doc/admin-guide/en-US/admin_UFO.xml
deleted file mode 100644
index 03be14dc9..000000000
--- a/doc/admin-guide/en-US/admin_UFO.xml
+++ /dev/null
@@ -1,1588 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-UFO">
- <title>Managing Unified File and Object Storage</title>
- <para>Unified File and Object Storage (UFO) unifies NAS and object storage technology. It
-provides a system for data storage that enables users to access the same data, both as an object and as a
-file, thus simplifying management and controlling storage costs.
-
-</para>
- <para>Unified File and Object Storage is built upon Openstack&apos;s Object Storage Swift. Open Stack Object Storage allows users to store and retrieve files and content through a simple Web Service (REST: Representational State Transfer) interface as objects and GlusterFS, allows users to store and retrieve files using Native Fuse and NFS mounts. It uses GlusterFS as a backend file system for Open Stack Swift. It also leverages on Open Stack Swift&apos;s web interface for storing and retrieving files over the web combined with GlusterFS features like scalability and high availability, replication, elastic volume management for data management at disk level.</para>
- <para>Unified File and Object Storage technology enables enterprises to adopt and deploy
-cloud storage solutions. It allows users to access and modify data as objects from a
-REST interface along with the ability to access and modify files from NAS interfaces including NFS
-and CIFS. In addition to decreasing cost and making it faster and easier to access object data,
-it also delivers massive scalability, high availability and replication of object storage.
-Infrastructure as a Service (IaaS) providers can utilize GlusterFS Unified File and Object Storage technology to enable their own cloud
-storage service. Enterprises can use this technology to accelerate the process of preparing file-based
-applications for the cloud and simplify new application development for cloud computing
-environments.
-
-</para>
- <para>OpenStack Object Storage is scalable object storage system and it is not a traditional file system. You will not be able to mount this system like traditional SAN or NAS
-volumes and perform POSIX compliant operations. </para>
- <para><figure>
- <title>Unified File and Object Storage Architecture</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/UFO_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure></para>
- <section>
- <title>Components of Object Storage</title>
- <para>The major components of Object Storage are:
- </para>
- <para><emphasis role="bold">Proxy Server</emphasis>
-
-</para>
- <para>All REST requests to the UFO are routed through the Proxy Server.
-
-
-</para>
- <para><emphasis role="bold">Objects and Containers </emphasis></para>
- <para>An object is the basic storage entity and any optional metadata that represents the data
-you store. When you upload data, the data is stored as-is (with no compression or encryption).
-
-</para>
- <para>A container is a storage compartment for your data and provides a way for you to organize
-your data. Containers can be visualized as directories in a Linux system. Data must be stored in a container and hence objects are created within a container.
-
-</para>
- <para>It implements objects as files and directories under the container. The object name is a &apos;/&apos; separated path and UFO maps it to directories until the last name in the path, which is marked as a file. With this approach, objects can be accessed as files and directories from native GlusterFS (FUSE) or NFS mounts by providing the &apos;/&apos; separated path.</para>
- <para><emphasis role="bold">Accounts and Account Servers</emphasis></para>
- <para>The OpenStack Object Storage system is designed to be used by many different storage
-consumers. Each user is associated with one or more accounts and must identify themselves using an authentication system. While authenticating, users must provide the name of the account for which the authentication is requested.
-
-</para>
- <para>UFO implements accounts as GlusterFS volumes. So, when a user is granted read/write permission on an account, it means that that user has access to all the data available on that GlusterFS volume.
-
-
-
-
-</para>
- <para><emphasis role="bold">Authentication and Access Permissions</emphasis>
-
-</para>
- <para>You must authenticate against an authentication service to receive OpenStack Object
-Storage connection parameters and an authentication token. The token must be passed
-in for all subsequent container or object operations. One authentication service that you
-can use as a middleware example is called <literal>tempauth</literal>.</para>
- <para>By default, each user has their own storage account and has full access to that
-account. Users must authenticate with their credentials as described above, but once
-authenticated they can manage containers and objects within that account. If a user wants to access the content from another account, they must have API access key or a session token provided by their authentication system.</para>
- </section>
- <section>
- <title>Advantages of using GlusterFS Unified File and Object Storage</title>
- <para>The following are the advantages of using GlusterFS UFO:</para>
- <itemizedlist>
- <listitem>
- <para>No limit on upload and download files sizes as compared to Open Stack Swift which limits the object size to 5GB.</para>
- </listitem>
- <listitem>
- <para>A unified view of data across NAS and Object Storage technologies.</para>
- </listitem>
- <listitem>
- <para>Using GlusterFS&apos;s UFO has other advantages like the following: </para>
- <para><itemizedlist>
- <listitem>
- <para>High availability</para>
- </listitem>
- <listitem>
- <para>Scalability</para>
- </listitem>
- <listitem>
- <para>Replication</para>
- </listitem>
- <listitem>
- <para>Elastic Volume management</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Deploy Unified File and Object Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during the installation of Unified File and Object Storage.
-</para>
- <section>
- <title>Pre-requisites </title>
- <para>GlusterFS&apos;s Unified File and Object Storage needs <literal>user_xattr</literal> support from the underlying disk file system.
-Use the following command to enable <literal>user_xattr</literal> for GlusterFS bricks backend:
-</para>
- <para><command># mount –o remount,user_xattr <replaceable>device name</replaceable></command></para>
- <para>For example,
-</para>
- <para><command># mount –o remount,user_xattr /dev/hda1 </command>
-</para>
- </section>
- <section>
- <title>Dependencies </title>
- <para>The following packages are installed on GlusterFS when you install Unified File and Object
-Storage:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>curl
-
-
-
-
-
-
-
-
-
-
-
-
-
-</para>
- </listitem>
- <listitem>
- <para>memcached</para>
- </listitem>
- <listitem>
- <para>openssl</para>
- </listitem>
- <listitem>
- <para>xfsprogs</para>
- </listitem>
- <listitem>
- <para>python2.6</para>
- </listitem>
- <listitem>
- <para>pyxattr</para>
- </listitem>
- <listitem>
- <para>python-configobj
-</para>
- </listitem>
- <listitem>
- <para>python-setuptools
-
-</para>
- </listitem>
- <listitem>
- <para>python-simplejson
-
-</para>
- </listitem>
- <listitem>
- <para>python-webob
-
-</para>
- </listitem>
- <listitem>
- <para>python-eventlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-greenlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-pastedeploy
-
-</para>
- </listitem>
- <listitem>
- <para>python-netifaces
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing and Configuring Unified File and Object Storage</title>
- <para>This section provides instructions on how to install and configure Unified File and Object Storage in your storage
-environment.</para>
- <section id="chap-ation_Guide-Dir_Quota-Enable">
- <title>Installing Unified File and Object Storage</title>
- <para>To install Unified File and Object Storage:</para>
- <orderedlist>
- <listitem>
- <para>Download <filename>rhel_install.sh</filename> install script from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/> .
-</para>
- </listitem>
- <listitem>
- <para>Run
- <filename>rhel_install.sh</filename> script using the following command:
-</para>
- <para><command># sh rhel_install.sh</command></para>
- </listitem>
- <listitem>
- <para>Download <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> files from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/>.</para>
- </listitem>
- <listitem>
- <para>Install <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> using the following commands:</para>
- <para><command># rpm -ivh swift-1.4.5-1.noarch.rpm</command></para>
- <para><command># rpm -ivh swift-plugin-1.0.-1.el6.noarch.rpm</command></para>
- <para><note>
- <para>You must repeat the above steps on all the machines on which you want to install Unified File and Object Storage. If you install the Unified File and Object Storage on multiple servers, you can use a load balancer like pound, nginx, and so on to distribute the request across the machines.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Adding Users</title>
- <para>The authentication system allows the administrator to grant different levels of access to different users based on the requirement. The following are the types of user permissions:
- </para>
- <itemizedlist>
- <listitem>
- <para>admin user
- </para>
- </listitem>
- <listitem>
- <para>normal user</para>
- </listitem>
- </itemizedlist>
- <para>Admin user has read and write permissions on the account. By default, a normal user has no read or write permissions. A normal user can only authenticate itself to get a Auth-Token. Read or write permission are provided through ACLs by the admin users.</para>
- <para>Add a new user by adding the following entry in <filename>/etc/swift/proxy-server.conf</filename> file:</para>
- <para><command>user_&lt;account-name&gt;_&lt;user-name&gt; = &lt;password&gt; [.admin]</command></para>
- <para>For example, </para>
- <para><command>user_test_tester = testing .admin</command>
-</para>
- <note>
- <para>During installation, the installation script adds few sample users to the <filename>proxy-server.conf</filename> file. It is highly recommended that you remove all the default sample user entries from the configuration file.
-</para>
- </note>
- <para>For more information on setting ACLs, see <xref linkend="chap-Administration_Guide-Working_UFO-Setting_ACLs"/>.</para>
- </section>
- <section>
- <title>Configuring Proxy Server</title>
- <para>The Proxy Server is responsible for connecting to the rest of the OpenStack Object Storage architecture. For each request, it looks up the location of the account, container, or object in the ring and route the request accordingly. The public API is also exposed through the proxy server. When objects are streamed to or from an object server, they are streamed directly through the proxy server to or from the user – the proxy server does not spool them.
-</para>
- <para>The configurable options pertaining to proxy server are stored in <filename>/etc/swift/proxy-server.conf</filename>. The following is the sample <filename>proxy-server.conf</filename> file:</para>
- <para><programlisting>[app:proxy-server]
-use = egg:swift#proxy
-allow_account_management=true
-account_autocreate=true
-
-[filter:tempauth]
-use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
-user_test_tester=testing.admin
-user_test2_tester2=testing2.admin
-user_test_tester3=testing3
-
-[filter:healthcheck]
-use = egg:swift#healthcheck
-
-[filter:cache]
-use = egg:swift#memcache</programlisting></para>
- <para>By default, GlusterFS&apos;s Unified File and Object Storage is configured to support HTTP protocol and uses temporary authentication to authenticate the HTTP requests.</para>
- </section>
- <section>
- <title>Configuring Authentication System</title>
- <para>Proxy server must be configured to authenticate using <literal>
- <literal>tempauth</literal>
- </literal>. </para>
- </section>
- <section>
- <title>Configuring Proxy Server for HTTPS</title>
- <para>By default, proxy server only handles HTTP request. To configure the proxy server to process HTTPS requests, perform the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Create self-signed cert for SSL using the following commands:</para>
- <para><programlisting>cd /etc/swift
-openssl req -new -x509 -nodes -out cert.crt -keyout cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the following lines to <filename>/etc/swift/proxy-server.conf </filename>under <replaceable>[DEFAULT]</replaceable></para>
- <para><programlisting>bind_port = 443
- cert_file = /etc/swift/cert.crt
- key_file = /etc/swift/cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Restart the servers using the following commands:</para>
- <para><programlisting>swift-init main stop
-swift-init main start</programlisting></para>
- </listitem>
- </orderedlist>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>proxy-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>80 </entry>
- <entry>Port for server to bind </entry>
- </row>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1</entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>cert_file </entry>
- <entry/>
- <entry>Path to the ssl .crt </entry>
- </row>
- <row>
- <entry>key_file </entry>
- <entry/>
- <entry>Path to the ssl .key </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>proxy-server.conf Server Options in the [proxy-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>proxy-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Log level </entry>
- </row>
- <row>
- <entry>log_headers </entry>
- <entry>True </entry>
- <entry>If True, log headers in each request </entry>
- </row>
- <row>
- <entry>recheck_account_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for account existence </entry>
- </row>
- <row>
- <entry>recheck_container_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for container existence </entry>
- </row>
- <row>
- <entry>object_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from object servers </entry>
- </row>
- <row>
- <entry>client_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from clients </entry>
- </row>
- <row>
- <entry>memcache_servers </entry>
- <entry>127.0.0.1:11211 </entry>
- <entry>Comma separated list of memcached servers ip:port </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>10 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>client_timeout </entry>
- <entry>60 </entry>
- <entry>Timeout to read one chunk from a client </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>error_suppression_interval </entry>
- <entry>60 </entry>
- <entry>Time in seconds that must elapse since the last error for a node to be considered no longer error limited </entry>
- </row>
- <row>
- <entry>error_suppression_limit </entry>
- <entry>10 </entry>
- <entry>Error count to consider a node error limited </entry>
- </row>
- <row>
- <entry>allow_account_management </entry>
- <entry>false </entry>
- <entry>Whether account <literal>PUT</literal>s and <literal>DELETE</literal>s are even callable </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Object Server</title>
- <para>The Object Server is a very simple blob storage server that can store, retrieve, and delete objects stored on local devices. Objects are stored as binary files on the file system with metadata stored in the file’s extended attributes (xattrs). This requires that the underlying file system choice for object servers support xattrs on files.
-
-</para>
- <para>The configurable options pertaining Object Server are stored in the file <filename>/etc/swift/object-server/1.conf</filename>. The following is the sample <filename>object-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6010
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster object-server
-
-[app:object-server]
-use = egg:swift#object
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[object-replicator]
-vm_test_mode = yes
-
-[object-updater]
-[object-auditor]</programlisting></para>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>object-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6000 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>object-server.conf Server Options in the [object-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the object server. For most cases, this should be <literal>egg:swift#object</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>object-server </entry>
- <entry>log name used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>log_requests </entry>
- <entry>True </entry>
- <entry>Whether or not to log each request </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3</entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5</entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>network_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write over the network </entry>
- </row>
- <row>
- <entry>disk_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write to disk </entry>
- </row>
- <row>
- <entry>max_upload_time </entry>
- <entry>65536 </entry>
- <entry>Maximum time allowed to upload an object </entry>
- </row>
- <row>
- <entry>slow </entry>
- <entry>0</entry>
- <entry>If &gt; 0, Minimum time in seconds for a <literal>PUT</literal> or <literal>DELETE</literal> request to complete </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Container Server</title>
- <para>The Container Server’s primary job is to handle listings of objects. The listing is done by querying the GlusterFS mount point with path. This query returns a list of all files and directories present under that container.
-</para>
- <para>The configurable options pertaining to container server are stored in <filename>/etc/swift/container-server/1.conf</filename> file. The following is the sample <filename>container-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6011
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster container-server
-
-[app:container-server]
-use = egg:swift#container
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[container-replicator]
-[container-updater]
-[container-auditor]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>container-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6001 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>container-server.conf Server Options in the [container-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>container-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Account Server</title>
- <para>The Account Server is very similar to the Container Server, except that it is responsible for listing of containers rather than objects. In UFO, each gluster volume is an account.
-</para>
- <para>The configurable options pertaining to account server are stored in <filename>/etc/swift/account-server/1.conf</filename> file. The following is the sample <filename>account-server/1.conf</filename> file: </para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6012
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster account-server
-
-[app:account-server]
-use = egg:swift#account
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[account-replicator]
-vm_test_mode = yes
-
-[account-auditor]
-[account-reaper]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>account-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6002 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>account-server.conf Server Options in the [account-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>account-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Starting and Stopping Server</title>
- <para>You must start the server manually when system reboots and whenever you update/modify the configuration files.</para>
- <itemizedlist>
- <listitem>
- <para>To start the server, enter the following command:</para>
- <para><command># swift_init main start</command></para>
- </listitem>
- <listitem>
- <para>To stop the server, enter the following command:</para>
- <para><command># swift_init main stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Unified File and Object Storage</title>
- <para>This section describes the REST API for administering and managing Object Storage. All requests will
-be directed to the host and URL described in the <filename>X-Storage-URL HTTP</filename> header obtained during
-successful authentication.
-</para>
- <section>
- <title>Configuring Authenticated Access </title>
- <para>Authentication is the process of proving identity to the system. To use the REST interface, you must
-obtain an authorization token using GET method and supply it with v1.0 as the path.
-</para>
- <para>Each REST request against the Object Storage system requires the addition of a specific authorization
-token HTTP x-header, defined as X-Auth-Token. The storage URL and authentication token are
-returned in the headers of the response.
-</para>
- <itemizedlist>
- <listitem>
- <para>To authenticate, run the following command:
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: &lt;auth URL&gt;
-X-Auth-User: &lt;account name&gt;:&lt;user name&gt;
-X-Auth-Key: &lt;user-Password&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: auth.example.com
-X-Auth-User: test:tester
-X-Auth-Key: testing
-
-HTTP/1.1 200 OK
-X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test
-X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-Content-Length: 0
-Date: Wed, 10 jul 2011 06:11:51 GMT</programlisting>
- <para>To authenticate access using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -H &apos;X-Storage-User: test:tester&apos; -H &apos;X-Storage-Pass:testing&apos; -k
-https://auth.example.com:443/auth/v1.0</programlisting>
- <para>The X-Auth-Url has to be parsed and used in the connection and request line of all subsequent
-requests to the server. In the example output, users connecting to server will send most
-container/object requests with a host header of example.storage.com and the request line&apos;s version
-and account as v1/AUTH_test.
-
-</para>
- </listitem>
- </itemizedlist>
- <note>
- <para>The authentication tokens are valid for a 24 hour period.
-</para>
- </note>
- </section>
- <section>
- <title>Working with Accounts </title>
- <para>This section describes the list of operations you can perform at the account level of the URL.
-</para>
- <section>
- <title>Displaying Container Information </title>
- <para>You can list the objects of a specific container, or all containers, as needed using GET command. You
-can use the following optional parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">To display container information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>List all the containers of an account using the following command:
-</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 16:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 39
-
-songs
-movies
-documents
-reports</programlisting>
- </listitem>
- </itemizedlist>
- <para>To display container information using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting></para>
- </section>
- <section>
- <title>Displaying Account Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of containers and the total
-bytes stored in the account.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display containers and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>HEAD /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 16:52:21 GMT
-Server: Apache
-X-Account-Container-Count: 4
-X-Account-Total-Bytes-Used: 394792</programlisting>
- <para>To display account metadata information using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Containers </title>
- <para>This section describes the list of operations you can perform at the container level of the URL.
-</para>
- <section>
- <title> Creating Containers </title>
- <para>You can use PUT command to create containers. Containers are the storage folders for your data.
-The URL encoded name must be less than 256 bytes and cannot contain a forward slash &apos;/&apos; character.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create a container, run the following command:
-</para>
- <programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>PUT /v1/AUTH_test/pictures/ HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-HTTP/1.1 201 Created
-
-Date: Wed, 13 Jul 2011 17:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To create container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 201 (Created) indicates that you have successfully created the container. If a
-container with same is already existed, the status code of 202 is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Objects of a Container </title>
- <para>You can list the objects of a container using GET command. You can use the following optional
-parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>prefix </entry>
- <entry>Displays the results limited to object names beginning with the substring x. beginning with the substring x. </entry>
- </row>
- <row>
- <entry>path </entry>
- <entry>Returns the object names nested in the pseudo path. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- <row>
- <entry>delimiter </entry>
- <entry>Returns all the object names nested in the container. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para>To display objects of a container
-</para>
- <itemizedlist>
- <listitem>
- <para>List objects of a specific container using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;[parm=value] HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 15:42:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 139
-
-sample file.jpg
-test-file.pdf
-You and Me.pdf
-Puddle of Mudd.mp3
-Test Reports.doc</programlisting>
- <para>To display objects of a container using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X GET-H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </section>
- <section>
- <title>Displaying Container Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of objects in a container and
-the total bytes of all the objects stored in the container.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display list of objects and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>HEAD /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 19:52:21 GMT
-Server: Apache
-X-Account-Object-Count: 8
-X-Container-Bytes-Used: 472</programlisting>
- <para>To display list of objects and storage used in a container using cURL (for the above example), run
-the following command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Container </title>
- <para>You can use DELETE command to permanently delete containers. The container must be empty
-before it can be deleted.
-</para>
- <para>You can issue HEAD command to determine if it contains any objects.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete a container, run the following command:
-</para>
- <programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>DELETE /v1/AUTH_test/pictures HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 17:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To delete a container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the container. If
-that container does not exist, the status code 404 (Not Found) is displayed, and if the container is
-not empty, the status code 409 (Conflict) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Updating Container Metadata </title>
- <para>You can update the metadata of container using POST operation, metadata keys should be prefixed
-with &apos;x-container-meta&apos;.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of the object, run the following command:
-</para>
- <programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Container-Meta-Zoo: Lion
-X-Container-Meta-Home: Dog
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -H &apos; X-Container-Meta-Zoo: Lion&apos; -H &apos;X-Container-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the container&apos;s metadata is updated successfully. If
-that object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Working_UFO-Setting_ACLs">
- <title> Setting ACLs on Container </title>
- <para>You can set the container access control list by using POST command on container with <command>x- container-read</command> and<command> x-container-write</command> keys.
-</para>
- <para>The ACL format is <command>[item[,item...]]</command>. Each item can be a group name to give access to or a
-referrer designation to grant or deny based on the HTTP Referer header.
-</para>
- <para>The referrer designation format is:<command> .r:[-]value</command>.
-</para>
- <para>The .r can also be <command>.ref, .referer, </command>or .<command>referrer</command>; though it will be shortened to.r for
-decreased character count usage. The value can be <command>*</command> to specify any referrer host is allowed access. The leading minus sign (-)
-indicates referrer hosts that should be denied access.
-</para>
- <para>Examples of valid ACLs:
-</para>
- <para><programlisting>.r:*
-.r:*,bobs_account,sues_account:sue
-bobs_account,sues_account:sue</programlisting></para>
- <para>Examples of invalid ACLs:</para>
- <para><programlisting>.r:
-.r:-</programlisting></para>
- <para>By default, allowing read access via <command><command>.</command>r </command>will not allow listing objects in the container but allows
-retrieving objects from the container. To turn on listings, use the .<command>rlistings</command> directive. Also, <command>.r</command>
-designations are not allowed in headers whose names include the word write.
-</para>
- <para>For example, to set all the objects access rights to &quot;public‟ inside the container using cURL (for the
-above example), run the following command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images
--H &apos;X-Container-Read: .r:*&apos; -k</programlisting></para>
- </section>
- </section>
- <section>
- <title> Working with Objects </title>
- <para>An object represents the data and any metadata for the files stored in the system. Through the REST
-interface, metadata for an object can be included by adding custom HTTP headers to the request
-and the data payload as the request body. Objects name should not exceed 1024 bytes after URL
-encoding.
-</para>
- <para>This section describes the list of operations you can perform at the object level of the URL.
-</para>
- <section>
- <title>Creating or Updating Object </title>
- <para>You can use PUT command to write or update an object&apos;s content and metadata.
-</para>
- <para>You can verify the data integrity by including an MD5checksum for the object&apos;s data in the ETag
-header. ETag header is optional and can be used to ensure that the object&apos;s contents are stored
-successfully in the storage system.
-</para>
- <para>You can assign custom metadata to objects by including additional HTTP headers on the PUT request.
-The objects created with custom metadata via HTTP headers are identified with the<command>X-Object- Meta</command>- prefix.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
-</para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 342909
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/dog HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-ETag: da1e100dc9e7becc810986e37875ae38
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To create or update an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/dog -H &apos;Content-
-Length: 0&apos; -k</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully created or updated the object.
-If there is a missing content-Length or Content-Type header in the request, the status code of 412
-(Length Required) is displayed. (Optionally) If the MD5 checksum of the data written to the storage
-system does not match the ETag value, the status code of 422 (Unprocessable Entity) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Chunked Transfer Encoding </title>
- <para>You can upload data without knowing the size of the data to be uploaded. You can do this by
-specifying an HTTP header of Transfer-Encoding: chunked and without using a Content-Length
-header.
-</para>
- <para>You can use this feature while doing a DB dump, piping the output through gzip, and then piping the
-data directly into Object Storage without having to buffer the data to disk to compute the file size.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
- </para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343
-19
-A bunch of data broken up
-D
-into chunks.
-0</programlisting>
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Copying Object </title>
- <para>You can copy object from one container to another or add a new object and then add reference to
-designate the source of the data from another container.
-</para>
- <para><emphasis role="bold">To copy object from one container to another </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To add a new object and designate the source of the data from another container, run the
-following command:
-</para>
- <para><programlisting>COPY /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;sourceobject&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt; authentication-token-key&gt;
-Destination: /&lt;container&gt;/&lt;destinationobject&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>COPY /v1/AUTH_test/images/dogs HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Destination: /photos/cats
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X COPY -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos; -H &apos;Destination: /photos/cats&apos; -k https://example.storage.com:443/v1/AUTH_test/images/dogs</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object. If there is a
-missing content-Length or Content-Type header in the request, the status code of 412 (Length
-Required) is displayed.
-</para>
- <para>You can also use PUT command to copy object by using additional header <command>X-Copy-From: container/obj</command>.
-</para>
- </listitem>
- <listitem>
- <para>To use PUT command to copy an object, run the following command:
-</para>
- <para><programlisting>PUT /v1/AUTH_test/photos/cats HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Copy-From: /images/dogs
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
--H &apos;X-Copy-From: /images/dogs&apos; –k
-https://example.storage.com:443/v1/AUTH_test/images/cats</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Information </title>
- <para>You can issue GET command on an object to view the object data of the object.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the content of an object run the following command:</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>GET /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 23:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 534210
-[.........]</programlisting></para>
- <para>To display the content of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 200 (Ok) indicates the object‟s data is displayed successfully. If that object does
-not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Metadata </title>
- <para>You can issue HEAD command on an object to view the object metadata and other standard HTTP
-headers. You must send only authorization token as header.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the metadata of the object, run the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>HEAD /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 21:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 512000
-Content-Type: text/plain; charset=UTF-8
-X-Object-Meta-House: Cat
-X-Object-Meta-Zoo: Cat
-X-Object-Meta-Home: Cat
-X-Object-Meta-Park: Cat</programlisting></para>
- <para>To display the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the object‟s metadata is displayed successfully. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </section>
- <section>
- <title>Updating Object Metadata </title>
- <para>You can issue POST command on an object name only to set or overwrite arbitrary key metadata. You
-cannot change the object‟s other headers such as Content-Type, ETag and others using POST
-operation. The POST command will delete all the existing metadata and replace it with the new
-arbitrary key metadata.
-</para>
- <para>You must prefix <emphasis role="bold">X-Object-Meta-</emphasis> to the key names.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of an object, run the following command:</para>
- <para><programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
-</para>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Object-Meta-Zoo: Lion
-X-Object-Meta-Home: Dog
-
-HTTP/1.1 202 Accepted
-Date: Wed, 13 Jul 2011 22:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -H &apos; X-Object-
-Meta-Zoo: Lion&apos; -H &apos;X-Object-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 202 (Accepted) indicates that you have successfully updated the object‟s
-metadata. If that object does not exist, the status code 404 (Not Found) is displayed.
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Object </title>
- <para>You can use DELETE command to permanently delete the object.
-</para>
- <para>The DELETE command on an object will be processed immediately and any subsequent operations
-like GET, HEAD, POST, or DELETE on the object will display 404 (Not Found) error.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete an object, run the following command:
-</para>
- <para><programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>DELETE /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To delete an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the object. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_commandref.xml b/doc/admin-guide/en-US/admin_commandref.xml
deleted file mode 100644
index df4c78f48..000000000
--- a/doc/admin-guide/en-US/admin_commandref.xml
+++ /dev/null
@@ -1,334 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Com_Ref">
- <title>Command Reference </title>
- <para>This section describes the available commands and includes the
-following section:
-</para>
- <itemizedlist>
- <listitem>
- <para>gluster Command
-</para>
- <para>Gluster Console Manager (command line interpreter)
-</para>
- </listitem>
- <listitem>
- <para>glusterd Daemon
-</para>
- <para>Gluster elastic volume management daemon
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>gluster Command </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>gluster - Gluster Console Manager (command line interpreter)
-</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>To run the program and display the gluster prompt:
-</para>
- <para><emphasis role="bold">gluster</emphasis>
-</para>
- <para>To specify a command directly:
-gluster [COMMANDS] [OPTIONS]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The Gluster Console Manager is a command line utility for elastic volume management. You can run
-the gluster command on any export server. The command enables administrators to perform cloud
-operations such as creating, expanding, shrinking, rebalancing, and migrating volumes without
-needing to schedule server downtime.
-</para>
- <para><emphasis role="bold">COMMANDS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="cgen1" colsep="0"/>
- <colspec colnum="3" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Command</entry>
- <entry namest="cgen1" nameend="c1">Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Volume</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume info [all | VOLNAME]</entry>
- <entry namest="cgen1" nameend="c1">Displays information about all volumes, or the specified volume.</entry>
- </row>
- <row>
- <entry>volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ...</entry>
- <entry namest="cgen1" nameend="c1">Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).</entry>
- </row>
- <row>
- <entry>volume delete VOLNAME</entry>
- <entry namest="cgen1" nameend="c1">Deletes the specified volume.</entry>
- </row>
- <row>
- <entry>volume start VOLNAME </entry>
- <entry namest="cgen1" nameend="c1">Starts the specified volume.</entry>
- </row>
- <row>
- <entry>volume stop VOLNAME [force] </entry>
- <entry namest="cgen1" nameend="c1">Stops the specified volume. </entry>
- </row>
- <row>
- <entry>volume rename VOLNAME NEW-VOLNAME </entry>
- <entry namest="cgen1" nameend="c1">Renames the specified volume.</entry>
- </row>
- <row>
- <entry>volume help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the volume command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Brick</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume add-brick VOLNAME NEW-BRICK ... </entry>
- <entry namest="cgen1" nameend="c1">Adds the specified brick to the specified volume.</entry>
- </row>
- <row>
- <entry>volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status </entry>
- <entry namest="cgen1" nameend="c1">Replaces the specified brick.</entry>
- </row>
- <row>
- <entry>volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... </entry>
- <entry namest="cgen1" nameend="c1">Removes the specified brick from the specified volume.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Rebalance</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME start</entry>
- <entry namest="cgen1" nameend="c1">Starts rebalancing the specified volume.</entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME stop </entry>
- <entry namest="cgen1" nameend="c1">Stops rebalancing the specified volume. </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME status </entry>
- <entry namest="cgen1" nameend="c1">Displays the rebalance status of the specified volume.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Log</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume log filename VOLNAME [BRICK] DIRECTORY </entry>
- <entry namest="cgen1" nameend="c1">Sets the log directory for the corresponding volume/brick. </entry>
- </row>
- <row>
- <entry>volume log rotate VOLNAME [BRICK] </entry>
- <entry namest="cgen1" nameend="c1">Rotates the log file for corresponding volume/brick.</entry>
- </row>
- <row>
- <entry>volume log locate VOLNAME [BRICK] </entry>
- <entry namest="cgen1" nameend="c1">Locates the log file for corresponding volume/brick. </entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Peer</emphasis>
- </entry>
- </row>
- <row>
- <entry>peer probe HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Probes the specified peer. </entry>
- </row>
- <row>
- <entry>peer detach HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Detaches the specified peer. </entry>
- </row>
- <row>
- <entry>peer status </entry>
- <entry namest="cgen1" nameend="c1">Displays the status of peers. </entry>
- </row>
- <row>
- <entry>peer help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the peer command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Geo-replication</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE start</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.</para>
- <para>You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.</para>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE stop</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.</para>
- <para>You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
-</para>
- </entry>
- </row>
- <row>
- <entry morerows="10">volume geo-replication MASTER SLAVE config [options]</entry>
- <entry/>
- <entry>Configure geo-replication options between the hosts specified by MASTER and SLAVE. </entry>
- </row>
- <row>
- <entry>gluster-command COMMAND</entry>
- <entry>The path where the gluster command is installed.</entry>
- </row>
- <row>
- <entry>gluster-log-level LOGFILELEVEL</entry>
- <entry>The log level for gluster processes.</entry>
- </row>
- <row>
- <entry>log-file LOGFILE</entry>
- <entry>The path to the geo-replication log file.</entry>
- </row>
- <row>
- <entry>log-level LOGFILELEVEL</entry>
- <entry>The log level for geo-replication.</entry>
- </row>
- <row>
- <entry>remote-gsyncd COMMAND</entry>
- <entry>The path where the gsyncd binary is installed on the remote machine.</entry>
- </row>
- <row>
- <entry>ssh-command COMMAND</entry>
- <entry>The ssh command to use to connect to the remote machine (the default is ssh).</entry>
- </row>
- <row>
- <entry>rsync-command COMMAND</entry>
- <entry>The rsync command to use for synchronizing the files (the default is rsync).</entry>
- </row>
- <row>
- <entry>volume_id= UID</entry>
- <entry>The command to delete the existing master UID for the intermediate/slave node.</entry>
- </row>
- <row>
- <entry>timeout SECONDS</entry>
- <entry>The timeout period.</entry>
- </row>
- <row>
- <entry>sync-jobs N</entry>
- <entry>The number of simultaneous files/directories that can be synchronized.</entry>
- </row>
- <row>
- <entry/>
- <entry>ignore-deletes</entry>
- <entry>If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Other</emphasis>
- </entry>
- </row>
- <row>
- <entry>help</entry>
- <entry/>
- <entry>Display the command options.</entry>
- </row>
- <row>
- <entry>quit</entry>
- <entry/>
- <entry>Exit the gluster command line interface.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/etc/glusterd/*
-</para>
- <para><emphasis role="bold">SEE ALSO </emphasis></para>
- <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), glusterd(8)</para>
- </section>
- <section>
- <title>glusterd Daemon </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>glusterd - Gluster elastic volume management daemon</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>glusterd [OPTION...]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers.
-</para>
- <para><emphasis role="bold">OPTIONS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Basic</emphasis>
- </entry>
- </row>
- <row>
- <entry>-l=LOGFILE, --log-file=LOGFILE</entry>
- <entry>Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).</entry>
- </row>
- <row>
- <entry>-L=LOGLEVEL, --log-level=LOGLEVEL</entry>
- <entry>Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). </entry>
- </row>
- <row>
- <entry>--debug</entry>
- <entry>Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.</entry>
- </row>
- <row>
- <entry>-N, --no-daemon</entry>
- <entry>Runs the program in the foreground.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Miscellaneous</emphasis>
- </entry>
- </row>
- <row>
- <entry>-?, --help</entry>
- <entry>Displays this help.</entry>
- </row>
- <row>
- <entry>--usage</entry>
- <entry>Displays a short usage message.</entry>
- </row>
- <row>
- <entry>-V, --version</entry>
- <entry>Prints the program version.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/etc/glusterd/*
-</para>
- <para><emphasis role="bold">SEE ALSO </emphasis></para>
- <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), gluster(8)</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_console.xml b/doc/admin-guide/en-US/admin_console.xml
deleted file mode 100644
index ebf273935..000000000
--- a/doc/admin-guide/en-US/admin_console.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Using the Gluster Console Manager – Command Line Utility</title>
- <para>The Gluster Console Manager is a single command line utility that simplifies configuration and management of your storage environment. The Gluster Console Manager is similar to the LVM (Logical Volume Manager) CLI or ZFS Command Line Interface, but across multiple storage servers. You can use the Gluster Console Manager online, while volumes are mounted and active. Gluster automatically synchronizes volume configuration information across all Gluster servers.</para>
- <para>Using the Gluster Console Manager, you can create new volumes, start volumes, and stop volumes, as required. You can also add bricks to volumes, remove bricks from existing volumes, as well as change translator settings, among other operations.</para>
- <para>You can also use the commands to create scripts for automation, as well as use the commands as an API to allow integration with third-party applications. </para>
- <para><emphasis role="bold">Running the Gluster Console Manager</emphasis></para>
- <para>You can run the Gluster Console Manager on any GlusterFS server either by invoking the commands or by running the Gluster CLI in interactive mode. You can also use the gluster command remotely using SSH. </para>
- <itemizedlist>
- <listitem>
- <para>To run commands directly: </para>
- <para><command> # gluster peer <replaceable>command</replaceable></command></para>
- <para>For example:</para>
- <para><command> # gluster peer status </command></para>
- </listitem>
- <listitem>
- <para>To run the Gluster Console Manager in interactive mode </para>
- <para><command># gluster</command></para>
- <para>You can execute gluster commands from the Console Manager prompt:</para>
- <para><command> gluster&gt; <replaceable>command</replaceable></command> </para>
- <para>For example, to view the status of the peer server:</para>
- <para># <command>gluster </command></para>
- <para><command>gluster &gt; peer status </command></para>
- <para>Display the status of the peer.</para>
- </listitem>
- </itemizedlist>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_directory_Quota.xml b/doc/admin-guide/en-US/admin_directory_Quota.xml
deleted file mode 100644
index 8a1012a6a..000000000
--- a/doc/admin-guide/en-US/admin_directory_Quota.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Dir_Quota">
- <title>Managing Directory Quota </title>
- <para>Directory quotas in GlusterFS allow you to set limits on usage of disk space by directories or volumes.
-The storage administrators can control the disk space utilization at the directory and/or volume
-levels in GlusterFS by setting limits to allocatable disk space at any level in the volume and directory
-hierarchy. This is particularly useful in cloud deployments to facilitate utility billing model.
- </para>
- <para> <note>
- <para>For now, only Hard limit is supported. Here, the limit cannot be exceeded and attempts to use
-more disk space or inodes beyond the set limit will be denied.
-</para>
- </note></para>
- <para>System administrators can also monitor the resource utilization to limit the storage for the users
-depending on their role in the organization.
-</para>
- <para>You can set the quota at the following levels:
- </para>
- <itemizedlist>
- <listitem>
- <para>Directory level – limits the usage at the directory level
- </para>
- </listitem>
- <listitem>
- <para>Volume level – limits the usage at the volume level
- </para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You can set the disk limit on the directory even if it is not created. The disk limit is enforced
-immediately after creating that directory. For more information on setting disk limit, see <xref linkend="chap-Administration_Guide-Dir_Quota-Set_Replace"/>.
-</para>
- </note>
- <section id="chap-Administration_Guide-Dir_Quota-Enable">
- <title>Enabling Quota </title>
- <para>You must enable Quota to set disk limits.
-</para>
- <para><emphasis role="bold">To enable quota</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Enable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> enable </command></para>
- <para>For example, to enable quota on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume enable
-Quota is enabled on /test-volume</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Disable">
- <title>Disabling Quota </title>
- <para>You can disable Quota, if needed.
-</para>
- <para><emphasis role="bold">To disable quota:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Disable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> disable </command></para>
- <para>For example, to disable quota translator on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume disable
-Quota translator is disabled on /test-volume</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Set_Replace">
- <title>Setting or Replacing Disk Limit </title>
- <para>You can create new directories in your storage environment and set the disk limit or set disk limit for
-the existing directories. The directory name should be relative to the volume with the export
-directory/mount being treated as &quot;/&quot;.
-</para>
- <para><emphasis role="bold">To set or replace disk limit</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Set the disk limit using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> limit-usage /<replaceable>directory</replaceable><replaceable>limit-value</replaceable></command></para>
- <para>For example, to set limit on data directory on test-volume where data is a directory under the
-export directory:
-</para>
- <programlisting># gluster volume quota test-volume limit-usage /data 10GB
-Usage limit has been set on /data</programlisting>
- <para><note>
- <para>In a multi-level directory hierarchy, the strictest disk limit will be considered for enforcement.
-</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Display">
- <title>Displaying Disk Limit Information </title>
- <para>You can display disk limit information on all the directories on which the limit is set.
-</para>
- <para><emphasis role="bold">To display disk limit information</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Display disk limit information of all the directories on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list</command>
-</para>
- <para>For example, to see the set disks limit on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume list
-
-<emphasis role="underline">
- <emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
- </emphasis>
-/Test/data 10 GB 6 GB
-/Test/data1 10 GB 4 GB</programlisting>
- </listitem>
- <listitem>
- <para>Display disk limit information on a particular directory on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to see the set limit on /data directory of test-volume:</para>
- <programlisting># gluster volume quota test-volume list /data
-
-<emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
-/Test/data 10 GB 6 GB</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Update">
- <title> Updating Memory Cache Size </title>
- <para>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating
-the maximum valid duration of directory sizes in cache, from the time they are populated.
-</para>
- <para>For example: If there are multiple clients writing to a single directory, there are chances that some
-other client might write till the quota limit is exceeded. However, this new file-size may not get
-reflected in the client till size entry in cache has become stale because of timeout. If writes happen
-on this client during this duration, they are allowed even though they would lead to exceeding of
-quota-limits, since size in cache is not in sync with the actual size. When timeout happens, the size
-in cache is updated from servers and will be in sync and no further writes will be allowed. A timeout
-of zero will force fetching of directory sizes from server for every operation that modifies file data
-and will effectively disables directory size caching on client side.
-</para>
- <para><emphasis role="bold">To update the memory cache size</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Update the memory cache size using the following command:
-</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> features.quota-timeout<replaceable> value</replaceable></command></para>
- <para>For example, to update the memory cache size for every 5 seconds on test-volume:
-</para>
- <programlisting># gluster volume set test-volume features.quota-timeout 5
-Set volume successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Remove">
- <title>Removing Disk Limit </title>
- <para>You can remove set disk limit, if you do not want quota anymore.
-</para>
- <para><emphasis role="bold">To remove disk limit </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Remove disk limit set on a particular directory using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> remove <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to remove the disk limit on /data directory of test-volume:
-</para>
- <programlisting># gluster volume quota test-volume remove /data
-Usage limit set on /data is removed</programlisting>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_geo-replication.xml b/doc/admin-guide/en-US/admin_geo-replication.xml
deleted file mode 100644
index b546bb8da..000000000
--- a/doc/admin-guide/en-US/admin_geo-replication.xml
+++ /dev/null
@@ -1,732 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Geo_Rep">
- <title>Managing Geo-replication</title>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from one site to another over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. </para>
- <para>Geo-replication uses a master–slave model, whereby replication and mirroring occurs between the following partners:</para>
- <itemizedlist>
- <listitem>
- <para>Master – a GlusterFS volume </para>
- </listitem>
- <listitem>
- <para>Slave – a slave which can be of the following types: </para>
- <itemizedlist>
- <listitem>
- <para>A local directory which can be represented as file URL like <filename>file:///path/to/dir</filename>. You can use shortened form, for example, <filename> /path/to/dir</filename>.</para>
- </listitem>
- <listitem>
- <para>A GlusterFS Volume - Slave volume can be either a local volume like <filename>gluster://localhost:volname</filename> (shortened form - <filename>:volname</filename>) or a volume served by different host like <filename>gluster://host:volname</filename> (shortened form - <filename>host:volname</filename>).</para>
- </listitem>
- </itemizedlist>
- <note>
- <para> Both of the above types can be accessed remotely using SSH tunnel. To use SSH, add an SSH prefix to either a file URL or gluster type URL. For example, <literal> ssh://root@remote-host:/path/to/dir</literal> (shortened form - <literal>root@remote-host:/path/to/dir</literal>) or <literal>ssh://root@remote-host:gluster://localhost:volname</literal> (shortened from - <literal>root@remote-host::volname</literal>). </para>
- </note>
- </listitem>
- </itemizedlist>
- <para>This section introduces Geo-replication, illustrates the various deployment scenarios, and explains how to configure the system to provide replication and mirroring in your environment. </para>
- <section id="chap-Administration_Guide-Geo_Rep-Replicated_volumes">
- <title>Replicated Volumes vs Geo-replication</title>
- <para>The following table lists the difference between replicated volumes and geo-replication:</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>Replicated Volumes</entry>
- <entry>Geo-replication</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Mirrors data across clusters</entry>
- <entry>Mirrors data across geographically distributed clusters </entry>
- </row>
- <row>
- <entry>Provides high-availability</entry>
- <entry>Ensures backing up of data for disaster recovery</entry>
- </row>
- <row>
- <entry>Synchronous replication (each and every file operation is sent across all the bricks)</entry>
- <entry>Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation">
- <title>Preparing to Deploy Geo-replication</title>
- <para>This section provides an overview of the Geo-replication deployment scenarios, describes how you can check the minimum system requirements, and explores common deployment scenarios.</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options">
- <title>Exploring Geo-replication Deployment Scenarios</title>
- <para>Geo-replication provides an incremental replication service over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. This section illustrates the most common deployment scenarios for Geo-replication, including the following: </para>
- <itemizedlist>
- <listitem>
- <para>Geo-replication over LAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over WAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over the Internet</para>
- </listitem>
- <listitem>
- <para>Multi-site cascading Geo-replication</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Geo-replication over LAN</emphasis></para>
- <para>You can configure Geo-replication to mirror data over a Local Area Network. </para>
- <mediaobject>
- <textobject>
- <phrase>Geo-replication over LAN</phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_LAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over WAN</emphasis></para>
- <para>You can configure Geo-replication to replicate data over a Wide Area Network.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over WAN</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_WAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over Internet</emphasis></para>
- <para>You can configure Geo-replication to mirror data over the Internet.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over Internet</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep03_Internet.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Multi-site cascading Geo-replication</emphasis> </para>
- <para>You can configure Geo-replication to mirror data in a cascading fashion across multiple sites. </para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Multi-site cascading Geo-replication </phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep04_Cascading.png"/>
- </imageobject>
- </mediaobject>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview">
- <title>Geo-replication Deployment Overview</title>
- <para>Deploying Geo-replication involves the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Verify that your environment matches the minimum system requirement. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/>.</para>
- </listitem>
- <listitem>
- <para>Determine the appropriate deployment scenario. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/>.</para>
- </listitem>
- <listitem>
- <para>Start Geo-replication on master and slave systems, as required. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs">
- <title>Checking Geo-replication Minimum Requirements</title>
- <para condition="gfs">Before deploying GlusterFS Geo-replication, verify that your systems match the minimum requirements. </para>
- <para condition="gfs">The following table outlines the minimum requirements for both master and slave nodes within your environment:</para>
- <informaltable frame="all" condition="gfs">
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Component</entry>
- <entry>Master</entry>
- <entry>Slave</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Operating System </entry>
- <entry>GNU/Linux</entry>
- <entry>GNU/Linux</entry>
- </row>
- <row>
- <entry>Filesystem</entry>
- <entry>GlusterFS 3.2 or higher</entry>
- <entry>GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) </entry>
- </row>
- <row>
- <entry>Python </entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- </row>
- <row>
- <entry>Secure shell </entry>
- <entry>OpenSSH version 4.0 (or higher)</entry>
- <entry>SSH2-compliant daemon </entry>
- </row>
- <row>
- <entry>Remote synchronization</entry>
- <entry>rsync 3.0.7 or higher </entry>
- <entry>rsync 3.0.7 or higher </entry>
- </row>
- <row>
- <entry>FUSE </entry>
- <entry>GlusterFS supported versions </entry>
- <entry>GlusterFS supported versions </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment">
- <title>Setting Up the Environment for Geo-replication</title>
- <para><emphasis role="bold">Time Synchronization</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>On bricks of a geo-replication master volume, all the servers&apos; time must be uniform. You are recommended to set up NTP (Network Time Protocol) service to keep the bricks sync in time and avoid out-of-time sync effect.</para>
- <para>For example: In a Replicated volume where brick1 of the master is at 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes time lag, all the changes in brick2 between this period may go unnoticed during synchronization of files with Slave.</para>
- <para>For more information on setting up NTP, see <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html"/>.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To setup Geo-replication for SSH </emphasis></para>
- <para>Password-less login has to be set up between the host machine (where geo-replication Start command will be issued) and the remote machine (where slave process should be launched through SSH).</para>
- <orderedlist>
- <listitem>
- <para>On the node where geo-replication sessions are to be set up, run the following command:</para>
- <para><command># ssh-keygen -f /etc/glusterd/geo-replication/secret.pem</command>
-</para>
- <para>Press Enter twice to avoid passphrase.
-</para>
- </listitem>
- <listitem>
- <para>Run the following command on master for all the slave hosts: </para>
- <para><command># ssh-copy-id -i /etc/glusterd/geo-replication/secret.pem.pub <varname>user</varname>@<varname>slavehost</varname></command></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave">
- <title>Setting Up the Environment for a Secure Geo-replication Slave</title>
- <para>You can configure a secure slave using SSH so that master is granted a
-restricted access. With GlusterFS, you need not specify
-configuration parameters regarding the slave on the master-side
-configuration. For example, the master does not require the location of
-the rsync program on slave but the slave must ensure that rsync is in
-the PATH of the user which the master connects using SSH. The only
-information that master and slave have to negotiate are the slave-side
-user account, slave&apos;s resources that master uses as slave resources, and
-the master&apos;s public key. Secure access to the slave can be established
-using the following options:</para>
- <itemizedlist>
- <listitem>
- <para>Restricting Remote Command Execution</para>
- </listitem>
- <listitem>
- <para>Using <filename>Mountbroker</filename> for Slaves</para>
- </listitem>
- <listitem>
- <para>Using IP based Access Control</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Backward Compatibility</emphasis> </para>
- <para>Your existing Ge-replication environment will work with GlusterFS,
-except for the following:</para>
- <itemizedlist>
- <listitem>
- <para>The process of secure reconfiguration affects only the glusterfs
-instance on slave. The changes are transparent to master with the
-exception that you may have to change the SSH target to an unprivileged
- account on slave.</para>
- </listitem>
- <listitem>
- <para>The following are the some exceptions where this might not work:</para>
- <para><itemizedlist>
- <listitem>
- <para>Geo-replication URLs which specify the slave resource when configuring master will include the following special characters: space, *, ?, [;</para>
- </listitem>
- <listitem>
- <para>Slave must have a running instance of glusterd, even if there is no
-gluster volume among the mounted slave resources (that is, file tree
-slaves are used exclusively) .</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Restricting Remote Command Execution</title>
- <para>If you restrict remote command execution, then the Slave audits commands
-coming from the master and the commands related to the given
-geo-replication session is allowed. The Slave also provides access only
-to the files within the slave resource which can be read or manipulated
-by the Master.</para>
- <para>To restrict remote command execution:</para>
- <orderedlist>
- <listitem>
- <para>Identify the location of the gsyncd helper utility on Slave. This utility is installed in <filename>PREFIX/libexec/glusterfs/gsyncd</filename>, where PREFIX is a compile-time parameter of glusterfs. For example, <filename>--prefix=PREFIX</filename> to the configure script with the following common values<filename> /usr, /usr/local, and /opt/glusterfs/glusterfs_version</filename>.</para>
- </listitem>
- <listitem>
- <para>Ensure that command invoked from master to slave passed through the slave&apos;s gsyncd utility. </para>
- <para>You can use either of the following two options:</para>
- <itemizedlist>
- <listitem>
- <para>Set gsyncd with an absolute path as the shell for the account
-which the master connects through SSH. If you need to use a privileged
-account, then set it up by creating a new user with UID 0. </para>
- </listitem>
- <listitem>
- <para>Setup key authentication with command enforcement to gsyncd. You must prefix the copy of master&apos;s public key in the Slave account&apos;s <filename>authorized_keys</filename> file with the following command:</para>
- <para><filename>command=&lt;path to gsyncd&gt;</filename>. </para>
- <para>For example, <command>command=&quot;PREFIX/glusterfs/gsyncd&quot; ssh-rsa AAAAB3Nza....</command></para>
- </listitem>
- </itemizedlist>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using Mountbroker for Slaves </title>
- <para><filename>mountbroker</filename> is a new service of glusterd. This service allows an
-unprivileged process to own a GlusterFS mount by registering a label
-(and DSL (Domain-specific language) options ) with glusterd through a
-glusterd volfile. Using CLI, you can send a mount request to glusterd to
-receive an alias (symlink) of the mounted volume.</para>
- <para>A request from the agent , the unprivileged slave agents use the
-mountbroker service of glusterd to set up an auxiliary gluster mount for
-the agent in a special environment which ensures that the agent is only
-allowed to access with special parameters that provide administrative
-level access to the particular volume.</para>
- <para><emphasis role="bold">To setup an auxiliary gluster mount for the agent</emphasis>:</para>
- <orderedlist>
- <listitem>
- <para>Create a new group. For example, <filename>geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a unprivileged account. For example, <filename> geoaccount</filename>. Make it a member of <filename> geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a new directory owned by root and with permissions <emphasis role="italic">0711.</emphasis> For example, create a create mountbroker-root directory <filename>/var/mountbroker-root</filename>.</para>
- </listitem>
- <listitem>
- <para>Add the following options to the glusterd volfile, assuming the name of the slave gluster volume as <filename>slavevol</filename>:</para>
- <para><command>option mountbroker-root /var/mountbroker-root </command></para>
- <para><command>option mountbroker-geo-replication.geoaccount slavevol</command></para>
- <para><command>option geo-replication-log-group geogroup</command></para>
- <para>If you are unable to locate the glusterd volfile at <filename>/etc/glusterfs/glusterd.vol</filename>, you can create a volfile containing both the default configuration and the above options and place it at <filename>/etc/glusterfs/</filename>. </para>
- <para>A sample glusterd volfile along with default options:</para>
- <para><screen>volume management
- type mgmt/glusterd
- option working-directory /etc/glusterd
- option transport-type socket,rdma
- option transport.socket.keepalive-time 10
- option transport.socket.keepalive-interval 2
- option transport.socket.read-fail-log off
-
- option mountbroker-root /var/mountbroker-root
- option mountbroker-geo-replication.geoaccount slavevol
- option geo-replication-log-group geogroup
-end-volume</screen></para>
- <para>If you host multiple slave volumes on Slave, you can repeat step 2. for each of them and add the following options to the <filename>volfile</filename>:</para>
- <para><screen>option mountbroker-geo-replication.geoaccount2 slavevol2
-option mountbroker-geo-replication.geoaccount3 slavevol3</screen></para>
- </listitem>
- <listitem>
- <para>Setup Master to access Slave as <filename>geoaccount@Slave</filename>.</para>
- <para>You can add multiple slave volumes within the same account (geoaccount) by providing comma-separated list (without spaces) as the argument of <command>mountbroker-geo-replication.geogroup</command>. You can also have multiple options of the form <command>mountbroker-geo-replication.*</command>. It is recommended to use one service account per Master machine. For example, if there are multiple slave volumes on Slave for the master machines Master1, Master2, and Master3, then create a dedicated service user on Slave for them by repeating Step 2. for each (like geogroup1, geogroup2, and geogroup3), and then add the following corresponding options to the volfile:
-</para>
- <para><command>option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount3 slavevol31</command></para>
- <para>
-Now set up Master1 to ssh to geoaccount1@Slave, etc.
-</para>
- <para>You must restart glusterd after making changes in the configuration to effect the updates. </para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using IP based Access Control</title>
- <para>You can use IP based access control method to provide access control for
-the slave resources using IP address. You can use method for both Slave
-and file tree slaves, but in the section, we are focusing on file tree
-slaves using this method.</para>
- <para>To set access control based on IP address for file tree slaves:</para>
- <orderedlist>
- <listitem>
- <para>Set a general restriction for accessibility of file tree resources:
-</para>
- <para><command># gluster volume geo-replication &apos;/*&apos; config allow-network ::1,127.0.0.1 </command></para>
- <para>This will refuse all requests for spawning slave agents except for
-requests initiated locally.</para>
- </listitem>
- <listitem>
- <para>If you want the to lease file tree at <filename>/data/slave-tree</filename> to Master, enter the following command:</para>
- <para><command># gluster volume geo-replication<varname> /data/slave-tree </varname>config allow-network <varname>MasterIP</varname></command></para>
- <para><varname>MasterIP</varname> is the IP address of Master. The slave agent spawn request from
-master will be accepted if it is executed at <filename>/data/slave-tree</filename>.</para>
- </listitem>
- </orderedlist>
- <para>If the Master side network configuration does not enable the Slave to
-recognize the exact IP address of Master, you can use CIDR notation to
-specify a subnet instead of a single IP address as MasterIP or even
-comma-separated lists of CIDR subnets.</para>
- <para>If you want to extend IP based access control to gluster slaves, use the following command:</para>
- <para><command># gluster volume geo-replication &apos;*&apos; config allow-network ::1,127.0.0.1</command></para>
- </section>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting">
- <title>Starting Geo-replication</title>
- <para>This section describes how to configure and start Gluster Geo-replication in your storage environment, and verify that it is functioning correctly. </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Verify"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Start">
- <title>Starting Geo-replication</title>
- <para>To start Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Start geo-replication between the hosts using the following command:
- </para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir start
-Starting geo-replication session between Volume1
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>You may need to configure the service before starting Gluster Geo-replication. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/>.</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Verify">
- <title>Verifying Successful Deployment</title>
- <para>You can use the gluster command to verify the status of Gluster Geo-replication in your environment.</para>
- <para><emphasis role="bold">To verify the status Gluster Geo-replication</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Verify the status by issuing the following command on host:</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Display">
- <title>Displaying Geo-replication Status Information</title>
- <para>You can display status information about a specific geo-replication master session, or a particular master-slave session, or all geo-replication sessions, as needed.</para>
- <para><emphasis role="bold">To display geo-replication status information</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information of all geo-replication sessions using the following command:</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Display information of a particular master slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example, to display information of Volume1 and example.com:/data/remote_dir
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para>The status of the geo-replication between Volume1 and example.com:/data/remote_dir is displayed.</para>
- </listitem>
- <listitem>
- <para>Display information of all geo-replication sessions belonging to a master</para>
- <para><command># gluster volume geo-replication MASTER status</command>
-</para>
- <para>For example, to display information of Volume1</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK
-
-Volume1 ssh://example.com:file:///data/remote_dir OK</programlisting></para>
- <para>The status of a session could be one of the following four:</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Starting</emphasis>: This is the initial phase of the Geo-replication session; it remains in this state for a minute, to make sure no abnormalities are present.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">OK</emphasis>: The geo-replication session is in a stable state.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Faulty</emphasis>: The geo-replication session has witnessed some abnormality and the situation has to be investigated further. For further information, see <xref linkend="chap-Administration_Guide-Troubleshooting"/> section.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Corrupt</emphasis>: The monitor thread which is monitoring the geo-replication session has died. This situation should not occur normally, if it persists contact Red Hat Support<ulink url="www.redhat.com/support/"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Configure">
- <title>Configuring Geo-replication</title>
- <para>To configure Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Use the following command at the Gluster command line:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> config [options]</command>
-</para>
- <para>For more information about the options, see <xref linkend="chap-Administration_Guide-Com_Ref"/>.
-</para>
- <para>For example:
-</para>
- <para>To view list of all option/value pair, use the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config</command>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Stop">
- <title>Stopping Geo-replication</title>
- <para>You can use the gluster command to stop Gluster Geo-replication (syncing of data from Master to Slave) in your environment. </para>
- <para><emphasis role="bold">To stop Gluster Geo-replication</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>Stop geo-replication between the hosts using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command></para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir stop
-Stopping geo-replication session between Volume1 and
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para>See <xref linkend="chap-Administration_Guide-Com_Ref"/> for more information about the gluster command.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Restoring_Data">
- <title>Restoring Data from the Slave</title>
- <para>You can restore data from the slave to the master volume, whenever the master volume becomes faulty for reasons like hardware failure.
-</para>
- <para>The example in this section assumes that you are using the Master Volume (Volume1) with the following configuration:
-</para>
- <para><programlisting>machine1# gluster volume info
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine2:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- <para>The data is syncing from master volume (Volume1) to slave directory (example.com:/data/remote_dir). To view the status of this geo-replication session run the following command on Master: </para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir OK</programlisting>
- <para><emphasis role="bold">Before Failure</emphasis>
-</para>
- <para>Assume that the Master volume had 100 files and was mounted at /mnt/gluster on one of the client machines (client). Run the following command on Client machine to view the list of files:
-</para>
- <para><programlisting>client# ls /mnt/gluster | wc –l
-100</programlisting></para>
- <para>The slave directory (example.com) will have same data as in the master volume and same can be viewed by running the following command on slave:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">After Failure</emphasis>
-</para>
- <para>If one of the bricks (machine2) fails, then the status of Geo-replication session is changed from &quot;OK&quot; to &quot;Faulty&quot;. To view the status of this geo-replication session run the following command on Master:
-</para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Faulty</programlisting>
- <para>Machine2 is failed and now you can see discrepancy in number of files between master and slave. Few files will be missing from the master volume but they will be available only on slave as shown below.
-</para>
- <para>Run the following command on Client:
- </para>
- <para><programlisting>client # ls /mnt/gluster | wc –l
-52</programlisting></para>
- <para>Run the following command on slave (example.com):
-</para>
- <para><programlisting>Example.com# # ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">To restore data from the slave machine</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Stop all Master&apos;s geo-replication sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir stop
-
-Stopping geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>Repeat <command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command>command on all active geo-replication sessions of master volume.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>Replace the faulty brick in the master by using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start
-Replace-brick started successfully</programlisting></para>
- </listitem>
- <listitem>
- <para>Commit the migration of data using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> commit force </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force
-Replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command:
-</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume info
-Volume Name: Volume1
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine3:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- </listitem>
- <listitem>
- <para>Run rsync command manually to sync data from slave to master volume&apos;s client (mount point).
-</para>
- <para>For example:
-</para>
- <para><command>example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster</command></para>
- <para>Verify that the data is synced by using the following command:
-</para>
- <para>On master volume, run the following command:
-</para>
- <para><programlisting>Client # ls | wc –l
-100</programlisting></para>
- <para>On the Slave run the following command:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para>Now Master volume and Slave directory is synced.
-</para>
- </listitem>
- <listitem>
- <para>Restart geo-replication session from master to slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir start
-Starting geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Best_Practices">
- <title>Best Practices</title>
- <para><emphasis role="bold">Manually Setting Time </emphasis></para>
- <para>If you have to change the time on your bricks manually, then you must set uniform time on all bricks. This avoids the out-of-time sync issue described in <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/>. Setting time backward corrupts the geo-replication index, so the recommended way to set the time manually is:
-</para>
- <orderedlist>
- <listitem>
- <para>Stop geo-replication between the master and slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> sto</command>p
-</para>
- </listitem>
- <listitem>
- <para>Stop the geo-replication indexing using the following command:
-</para>
- <para><command># gluster volume set <replaceable>MASTER</replaceable> geo-replication.indexing of</command>f</para>
- </listitem>
- <listitem>
- <para>Set uniform time on
- all bricks.s</para>
- </listitem>
- <listitem>
- <para>Restart your geo-replication sessions by using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE </replaceable>start </command></para>
- </listitem>
- </orderedlist>
- <para><emphasis role="bold">Running Geo-replication commands in one system</emphasis>
-</para>
- <para>It is advisable to run the geo-replication commands in one of the bricks in the trusted storage pool. This is because, the log files for the geo-replication session would be stored in the *Server* where the Geo-replication start is initiated. Hence it would be easier to locate the log-files when required.
-</para>
- <para><emphasis role="bold">Isolation </emphasis></para>
- <para>Geo-replication slave operation is not sandboxed as of now and is ran as a privileged service. So for the security reason, it is advised to create a sandbox environment (dedicated machine / dedicated virtual machine / chroot/container type solution) by the administrator to run the geo-replication slave in it. Enhancement in this regard will be available in follow-up minor release.
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_managing_volumes.xml b/doc/admin-guide/en-US/admin_managing_volumes.xml
deleted file mode 100644
index 0c4d2e922..000000000
--- a/doc/admin-guide/en-US/admin_managing_volumes.xml
+++ /dev/null
@@ -1,735 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Managing_Volumes">
- <title>Managing GlusterFS Volumes</title>
- <para>This section describes how to perform common GlusterFS management operations, including the following: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Expanding"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Shrinking"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Migrating"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Stop"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Self_heal"/></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Tuning">
- <title>Tuning Volume Options</title>
- <para>You can tune volume options, as needed, while the cluster is online and available. </para>
- <para><note>
- <para>Red Hat recommends you to set server.allow-insecure option to ON if there are too many bricks in each volume or if there are too many services which have already utilized all the privileged ports in the system. Turning this option ON allows ports to accept/reject messages from insecure ports. So, use this option only if your deployment requires it. </para>
- </note></para>
- <para>To tune volume options </para>
- <itemizedlist>
- <listitem>
- <para>Tune volume options using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME OPTION PARAMETER</replaceable></command></para>
- <para>For example, to specify the performance cache size for test-volume:</para>
- <para><programlisting># gluster volume set test-volume performance.cache-size 256MB
-Set volume successful</programlisting></para>
- <para>The following table lists the Volume options along with its description and default value: </para>
- <para><note>
- <para>The default options given here are subject to modification at any given time and may not be the same for all versions.</para>
- </note></para>
- <informaltable frame="all">
- <tgroup cols="4">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <colspec colname="c4"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- <entry>Default Value</entry>
- <entry>Available Options</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>auth.allow</entry>
- <entry>IP addresses of the clients which should be allowed to access the volume. </entry>
- <entry>* (allow all)</entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.1.*</entry>
- </row>
- <row>
- <entry>auth.reject</entry>
- <entry>IP addresses of the clients which should be denied to access the volume. </entry>
- <entry>NONE (reject none) </entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.2.*</entry>
- </row>
- <row>
- <entry>client.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the client after a network disconnection.</entry>
- <entry>10 </entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>cluster.self-heal-window-size</entry>
- <entry>Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. </entry>
- <entry>16 </entry>
- <entry>0 - 1025 blocks</entry>
- </row>
- <row>
- <entry>cluster.data-self-heal-algorithm</entry>
- <entry>Specifies the type of self-heal. If you set the option as &quot;full&quot;, the entire file is copied from source to destinations. If the option is set to &quot;diff&quot; the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the &quot;diff&quot; algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than &quot;diff&quot; which has to read checksums and then read and write. </entry>
- <entry>reset</entry>
- <entry>full | diff | reset</entry>
- </row>
- <row>
- <entry>cluster.min-free-disk</entry>
- <entry>Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. </entry>
- <entry>10%</entry>
- <entry>Percentage of required minimum free disk space</entry>
- </row>
- <row>
- <entry>cluster.stripe-block-size</entry>
- <entry>Specifies the size of the stripe unit that will be read from or written to. </entry>
- <entry>128 KB (for all files)</entry>
- <entry>size in bytes</entry>
- </row>
- <row>
- <entry>cluster.self-heal-daemon</entry>
- <entry>Allows you to turn-off proactive self-heal on replicated volumes.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.brick-log-level</entry>
- <entry>Changes the log-level of the bricks. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.client-log-level</entry>
- <entry>Changes the log-level of the clients. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.latency-measurement</entry>
- <entry>Statistics related to the latency of each operation would be tracked. </entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.dump-fd-stats</entry>
- <entry>Statistics related to file-operations would be tracked.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>feature.read-only</entry>
- <entry>Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it.</entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.lock-heal</entry>
- <entry>Enables self-healing of locks when the network disconnects.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.quota-timeout</entry>
- <entry>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. </entry>
- <entry>0</entry>
- <entry>0 - 3600 secs</entry>
- </row>
- <row>
- <entry>geo-replication.indexing</entry>
- <entry>Use this option to automatically sync the changes in the filesystem from Master to Slave.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>network.frame-timeout</entry>
- <entry>The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. </entry>
- <entry>1800 (30 mins) </entry>
- <entry>1800 secs</entry>
- </row>
- <row>
- <entry>network.ping-timeout</entry>
- <entry>The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. <para>This reconnect is a very expensive operation and should be avoided.
-</para></entry>
- <entry>42 Secs</entry>
- <entry>42 Secs</entry>
- </row>
- <row>
- <entry>nfs.enable-ino32</entry>
- <entry>For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: <para>* Built 32-bit and run on 32-bit machines.</para><para>* Built 32-bit on 64-bit systems.</para><para>* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.</para><para>Either of the conditions above can lead to application on Linux NFS clients failing with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; errors.</para></entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>nfs.volume-access </entry>
- <entry>Set the access type for the specified sub-volume. </entry>
- <entry>read-write </entry>
- <entry>read-write|read-only </entry>
- </row>
- <row>
- <entry>nfs.trusted-write </entry>
- <entry>If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. <para>In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.</para></entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.trusted-sync</entry>
- <entry> All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. </entry>
- <entry>off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.export-dir </entry>
- <entry>By default, all sub-volumes of NFS are exported as individual exports. Now, this option allows you to export only the specified subdirectory or subdirectories in the volume. This option can also be used in conjunction with nfs3.export-volumes option to restrict exports only to the subdirectories specified through this option. You must provide an absolute path.</entry>
- <entry>Enabled for all sub directories.</entry>
- <entry>Enable | Disable </entry>
- </row>
- <row>
- <entry>nfs.export-volumes </entry>
- <entry>Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. </entry>
- <entry>on</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-unix </entry>
- <entry>Enable/Disable the AUTH_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required.</entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-null </entry>
- <entry>Enable/Disable the AUTH_NULL authentication type. It is not recommended to change the default value for this option. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-allow&lt;IP- Addresses&gt; </entry>
- <entry>Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-reject IP- Addresses </entry>
- <entry>Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.ports-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>off</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.addr-namelookup </entry>
- <entry>Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.* filters. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.register-with- portmap </entry>
- <entry>For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.port &lt;PORT- NUMBER&gt; </entry>
- <entry>Use this option on systems that need Gluster NFS to be associated with a non-default port number. </entry>
- <entry>38465- 38467 </entry>
- <entry/>
- </row>
- <row>
- <entry>nfs.disable</entry>
- <entry>Turn-off volume being exported by NFS</entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.write-behind-window-size </entry>
- <entry>Size of the per-file write-behind buffer.</entry>
- <entry>1 MB </entry>
- <entry>Write-behind cache size </entry>
- </row>
- <row>
- <entry>performance.io-thread-count </entry>
- <entry>The number of threads in IO threads translator. </entry>
- <entry>16</entry>
- <entry>0 - 65 </entry>
- </row>
- <row>
- <entry>performance.flush-behind </entry>
- <entry>If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. </entry>
- <entry>On </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.cache-max-file-size </entry>
- <entry>Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. </entry>
- <entry>2 ^ 64 -1 bytes </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-min-file-size </entry>
- <entry> Sets the minimum file size cached by the io-cache translator. Values same as &quot;max&quot; above.</entry>
- <entry>0B</entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-refresh-timeout </entry>
- <entry>The cached data for a file will be retained till &apos;cache-refresh-timeout&apos; seconds, after which data re-validation is performed. </entry>
- <entry>1 sec </entry>
- <entry>0 - 61 </entry>
- </row>
- <row>
- <entry>performance.cache-size </entry>
- <entry>Size of the read cache.</entry>
- <entry> 32 MB </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>server.allow-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>on </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>server.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the server after a network disconnection.</entry>
- <entry>10</entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>server.statedump-path </entry>
- <entry>Location of the state dump file. </entry>
- <entry>/tmp directory of the brick </entry>
- <entry>New directory path</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>You can view the changed volume options using the<command> # gluster volume info <replaceable>VOLNAME</replaceable></command> command. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Expanding">
- <title>Expanding Volumes</title>
- <para>You can expand volumes, as needed, while the cluster is online and available. For example, you might want to add a brick to a distributed volume, thereby increasing the distribution and adding to the capacity of the GlusterFS volume. </para>
- <para>Similarly, you might want to add a group of bricks to a distributed replicated volume, increasing the capacity of the GlusterFS volume. </para>
- <para><note>
- <para>When expanding distributed replicated and distributed striped volumes, you need to add a number of bricks that is a multiple of the replica or stripe count. For example, to expand a distributed replicated volume with a replica count of 2, you need to add bricks in multiples of 2 (such as 4, 6, 8, etc.). </para>
- </note></para>
- <para><emphasis role="bold">To expand a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>On the first server in the cluster, probe the server to which you want to add the new brick using the following command:</para>
- <para><command># gluster peer probe <replaceable>HOSTNAME</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster peer probe server4
-Probe successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the brick using the following command: </para>
- <para><command># gluster volume add-brick <replaceable>VOLNAME NEW-BRICK</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume add-brick test-volume server4:/exp4
-Add Brick successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting>Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Shrinking">
- <title>Shrinking Volumes</title>
- <para>You can shrink volumes, as needed, while the cluster is online and available. For example, you might need to remove a brick that has become inaccessible in a distributed volume due to hardware or network failure. </para>
- <para><note>
- <para>Data residing on the brick that you are removing will no longer be accessible at the Gluster mount point. Note however that only the configuration information is removed - you can continue to access the data directly from the brick, as necessary. </para>
- </note></para>
- <para>When shrinking distributed replicated and distributed striped volumes, you need to remove a number of bricks that is a multiple of the replica or stripe count. For example, to shrink a distributed striped volume with a stripe count of 2, you need to remove bricks in multiples of 2 (such as 4, 6, 8, etc.). In addition, the bricks you are trying to remove must be from the same sub-volume (the same replica or stripe set). </para>
- <para><emphasis role="bold">To shrink a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Remove the brick using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command> <command>start</command></para>
- <para>For example, to remove server2:/exp2:</para>
- <para><programlisting># gluster volume remove-brick test-volume server2:/exp2
-
-Removing brick(s) can result in data loss. Do you want to Continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter &quot;y&quot; to confirm the operation. The command displays the following message indicating that the remove brick operation is successfully started: </para>
- <para><programlisting>Remove Brick successful </programlisting></para>
- </listitem>
- <listitem>
- <para>(Optional) View the status of the remove brick operation using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command><command> status</command></para>
- <para>For example, to view the status of remove brick operation on server2:/exp2 brick:</para>
- <para><screen># gluster volume remove-brick test-volume server2:/exp2 status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress</screen></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 3
-Bricks:
-Brick1: server1:/exp1
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Migrating">
- <title>Migrating Volumes</title>
- <para>You can migrate the data from one brick to another, as needed, while the cluster is online and available. </para>
- <para><emphasis role="bold">To migrate a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Make sure the new brick, server5 in this example, is successfully added to the cluster.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Migrate the data from one brick to another using the following command:</para>
- <para><command> # gluster volume replace-brick <code>VOLNAME</code><code> BRICK</code><code>NEW-BRICK</code> start</command></para>
- <para>For example, to migrate the data in server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start
-Replace brick start operation successful</programlisting></para>
- <para><note>
- <para>You need to have the FUSE package installed on the server on which you are running the replace-brick command for the command to work.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>To pause the migration operation, if needed, use the following command: </para>
- <para><command># gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname> pause </command></para>
- <para>For example, to pause the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause
-Replace brick pause operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>To abort the migration operation, if needed, use the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>abort </command></para>
- <para>For example, to abort the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort
-Replace brick abort operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the status of the migration operation using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>status </command></para>
- <para>For example, to check the data migration status from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status
-Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile
-Number of files migrated = 10567
-Migration complete</programlisting></para>
- <para>The status command shows the current file being migrated along with the current total number of files migrated. After completion of migration, it displays Migration complete.</para>
- </listitem>
- <listitem>
- <para>Commit the migration of data from one brick to another using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>commit </command></para>
- <para>For example, to commit the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit
-replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command: </para>
- <para><command># gluster volume info <code>VOLNAME</code></command></para>
- <para>For example, to check the volume information of new brick server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: testvolume
-Type: Replicate
-Status: Started
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server4:/exp4
-Brick4: server5:/exp5
-
-The new volume details are displayed.
-</programlisting></para>
- <para>The new volume details are displayed.</para>
- <para>In the above example, previously, there were bricks; 1,2,3, and 4 and now brick 3 is replaced by brick 5.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing">
- <title>Rebalancing Volumes</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. New directories created after expanding or shrinking of the volume will be evenly distributed automatically. For all the existing directories, the distribution can be fixed by rebalancing the layout and/or data. </para>
- <para>This section describes how to rebalance GlusterFS volumes in your storage environment, using the following common scenarios: </para>
- <itemizedlist>
- <listitem>
- <para>Fix Layout - Fixes the layout changes so that the files can actually go to newly added nodes. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout"/>. </para>
- </listitem>
- <listitem>
- <para>Fix Layout and Migrate Data - Rebalances volume by fixing the layout changes and migrating the existing data. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate"/>.</para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout">
- <title>Rebalancing Volume to Fix Layout Changes</title>
- <para>Fixing the layout is necessary because the layout structure is static for a given directory. In a scenario where new bricks have been added to the existing volume, newly created files in existing directories will still be distributed only among the old bricks. The <command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start </command>command will fix the layout information so that the files can also go to newly added nodes. When this command is issued, all the file stat information which is already cached will get revalidated. </para>
- <para>A fix-layout rebalance will only fix the layout changes and does not migrate data. If you want to migrate the existing data, use<command># gluster volume rebalance <varname>VOLNAME</varname> start </command> command to rebalance data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout changes</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume fix-layout start
-Starting rebalance on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate">
- <title>Rebalancing Volume to Fix Layout and Migrate Data</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout and migrate the existing data</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Start the migration operation forcefully on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start force</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start force
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Status of Rebalance Operation</title>
- <para>You can display the status information about rebalance volume operation, as needed. </para>
- <para><emphasis role="bold">To view status of rebalance volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Check the status of the rebalance operation, using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> status</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress</screen></para>
- <para>The time to complete the rebalance operation depends on the number of files on the volume along with the corresponding file sizes. Continue checking the rebalance status, verifying that the number of files rebalanced or total files scanned keeps increasing.</para>
- <para>For example, running the status command again might display a result similar to the following:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress</screen></para>
- <para>The rebalance status displays the following when the rebalance is complete:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed</screen></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Stopping Rebalance Operation</title>
- <para>You can stop the rebalance operation, as needed.</para>
- <para><emphasis role="bold">To stop rebalance</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Stop the rebalance operation using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> stop</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume stop
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
-Stopped rebalance process on volume test-volume </screen></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Stop">
- <title>Stopping Volumes</title>
- <para>To stop a volume</para>
- <orderedlist>
- <listitem>
- <para>Stop the volume using the following command:
-
-</para>
- <para><command># gluster volume stop <varname>VOLNAME </varname></command></para>
- <para>For example, to stop test-volume:</para>
- <para><programlisting># gluster volume stop test-volume
-Stopping volume will make its data inaccessible. Do you want to continue? (y/n)
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput>y</userinput> to confirm the operation. The output of the command displays the following:
-
-</para>
- <programlisting>Stopping volume test-volume has been successful</programlisting>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Delete">
- <title>Deleting Volumes</title>
- <para>To delete a volume </para>
- <orderedlist>
- <listitem>
- <para>Delete the volume using the following command:</para>
- <para><command># gluster volume delete <varname>VOLNAME</varname></command></para>
- <para>For example, to delete test-volume:</para>
- <para><programlisting># gluster volume delete test-volume
-Deleting volume will erase all information about the volume. Do you want to continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput role="bold">y</userinput> to confirm the operation. The command displays the following:</para>
- <para><programlisting>Deleting volume test-volume has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Self_heal">
- <title>Triggering Self-Heal on Replicate</title>
- <para>In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the replicas in sync. Now the pro-active self-heal daemon runs in the background, diagnoses issues and automatically initiates self-healing every 10 minutes on the files which requires<emphasis role="italic"> healing</emphasis>. </para>
- <para>You can view the list of files that need <emphasis role="italic">healing</emphasis>, the list of files which are currently/previously <emphasis role="italic">healed</emphasis>, list of files which are in split-brain state, and you can manually trigger self-heal on the entire volume or only on the files which need <emphasis role="italic">healing</emphasis>.</para>
- <itemizedlist>
- <listitem>
- <para>Trigger self-heal only on the files which requires <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to trigger self-heal on files which requires <emphasis role="italic">healing</emphasis> of test-volume:</para>
- <para><screen># gluster volume heal test-volume
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>Trigger self-heal on all the files of a volume:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>full</command></para>
- <para>For example, to trigger self-heal on all the files of of test-volume:</para>
- <para><screen># gluster volume heal test-volume full
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info</command></para>
- <para>For example, to view the list of files on test-volume that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><screen># gluster volume heal test-volume info
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 101
-/95.txt
-/32.txt
-/66.txt
-/35.txt
-/18.txt
-/26.txt
-/47.txt
-/55.txt
-/85.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that are self-healed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info healed</command> </para>
- <para>For example, to view the list of files on test-volume that are self-healed:</para>
- <para><screen># gluster volume heal test-volume info healed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 69
-/99.txt
-/93.txt
-/76.txt
-/11.txt
-/27.txt
-/64.txt
-/80.txt
-/19.txt
-/41.txt
-/29.txt
-/37.txt
-/46.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume on which the self-heal failed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info failed</command> </para>
- <para>For example, to view the list of files of test-volume that are not self-healed:</para>
- <para><screen># gluster volume heal test-volume info failed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick server2:/gfs/test-volume_3
-Number of entries: 72
-/90.txt
-/95.txt
-/77.txt
-/71.txt
-/87.txt
-/24.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume which are in split-brain state:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info split-brain</command> </para>
- <para>For example, to view the list of files of test-volume which are in split-brain state:</para>
- <para><screen># gluster volume heal test-volume info split-brain
-Brick server1:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...</screen></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_monitoring_workload.xml b/doc/admin-guide/en-US/admin_monitoring_workload.xml
deleted file mode 100644
index e85bc51d8..000000000
--- a/doc/admin-guide/en-US/admin_monitoring_workload.xml
+++ /dev/null
@@ -1,878 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Monitor_Workload">
- <title>Monitoring your GlusterFS Workload</title>
- <para>You can monitor the GlusterFS volumes on different parameters. Monitoring volumes helps in capacity planning and performance tuning tasks of the GlusterFS volume. Using these information, you can identify and troubleshoot issues. </para>
- <para>You can use Volume Top and Profile commands to view the performance and identify bottlenecks/hotspots of each brick of a volume. This helps system administrators to get vital performance information whenever performance needs to be probed. </para>
- <para>You can also perform statedump of the brick processes and nfs server process of a volume, and also view volume status and volume information. </para>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile">
- <title>Running GlusterFS Volume Profile Command</title>
- <para>GlusterFS Volume Profile command provides an interface to get the per-brick I/O information for each File Operation (FOP) of a volume. The per brick information helps in identifying bottlenecks in the storage system.
-</para>
- <para>This section describes how to run GlusterFS Volume Profile command by performing the following operations:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Start">
- <title>Start Profiling</title>
- <para>You must start the Profiling to view the File Operation information for each brick.
-</para>
- <para><emphasis role="bold">To start profiling: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start profiling using the following command:
- </para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> start </command></para>
- <para>For example, to start profiling on test-volume:
-</para>
- <para><programlisting># gluster volume profile test-volume start
-Profiling started on test-volume</programlisting></para>
- <para>When profiling on the volume is started, the following additional options are displayed in the Volume Info:
-</para>
- <para><programlisting>diagnostics.count-fop-hits: on
-
-diagnostics.latency-measurement: on</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Display">
- <title>Displaying the I/0 Information</title>
- <para>You can view the I/O information of each brick.
-</para>
- <para>To display I/O information:
-</para>
- <itemizedlist>
- <listitem>
- <para>Display the I/O information using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> info</command>
-
-</para>
- <para>For example, to see the I/O information on test-volume:
-
-</para>
- <para><programlisting># gluster volume profile test-volume info
-Brick: Test:/export/2
-Cumulative Stats:
-
-Block 1b+ 32b+ 64b+
-Size:
- Read: 0 0 0
- Write: 908 28 8
-
-Block 128b+ 256b+ 512b+
-Size:
- Read: 0 6 4
- Write: 5 23 16
-
-Block 1024b+ 2048b+ 4096b+
-Size:
- Read: 0 52 17
- Write: 15 120 846
-
-Block 8192b+ 16384b+ 32768b+
-Size:
- Read: 52 8 34
- Write: 234 134 286
-
-Block 65536b+ 131072b+
-Size:
- Read: 118 622
- Write: 1341 594
-
-
-%-latency Avg- Min- Max- calls Fop
- latency Latency Latency
-___________________________________________________________
-4.82 1132.28 21.00 800970.00 4575 WRITE
-5.70 156.47 9.00 665085.00 39163 READDIRP
-11.35 315.02 9.00 1433947.00 38698 LOOKUP
-11.88 1729.34 21.00 2569638.00 7382 FXATTROP
-47.35 104235.02 2485.00 7789367.00 488 FSYNC
-
-------------------
-
-------------------
-
-Duration : 335
-
-BytesRead : 94505058
-
-BytesWritten : 195571980</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Stop">
- <title>Stop Profiling</title>
- <para>You can stop profiling the volume, if you do not need profiling information anymore.
-</para>
- <para><emphasis role="bold">To stop profiling</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Stop profiling using the following command:
-</para>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> stop</command>
-</para>
- <para>For example, to stop profiling on test-volume:</para>
- <para><command># gluster volume profile <replaceable>test-volume</replaceable> stop</command> </para>
- <para><computeroutput>Profiling stopped on test-volume</computeroutput></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top">
- <title> Running GlusterFS Volume TOP Command </title>
- <para>GlusterFS Volume Top command allows you to view the glusterfs bricks’ performance metrics like
-read, write, file open calls, file read calls, file write calls, directory open calls, and directory real
-calls. The top command displays up to 100 results.
-</para>
- <para>This section describes how to run and view the results for the following GlusterFS Top commands:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Read"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Write"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count">
- <title>Viewing Open fd Count and Maximum fd Count </title>
- <para>You can view both current open fd count (list of files that are currently the most opened and the
-count) on the brick and the maximum open fd count (count of files that are the currently open and
-the count of maximum number of files opened at any given point of time, since the servers are up
-and running). If the brick name is not specified, then open fd metrics of all the bricks belonging to
-the volume will be displayed.
-</para>
- <para><emphasis role="bold">To view open fd count and maximum fd count: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View open fd count and maximum fd count using the following command:</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> open [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view open fd count and maximum fd count on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable> and list top 10 open calls:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> open brick <replaceable>server:/export/</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 </computeroutput></para>
- <para><computeroutput>Current open fd&apos;s: 34 Max open fd&apos;s: 209 </computeroutput><programlisting> ==========Open file stats========
-
-open file name
-call count
-
-2 /clients/client0/~dmtmp/PARADOX/
- COURSES.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- ENROLL.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-10 /clients/client0/~dmtmp/PWRPNT/
- TIPS.PPT
-
-10 /clients/client0/~dmtmp/PWRPNT/
- PCBENCHM.PPT
-
-9 /clients/client7/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client1/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client2/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client8/~dmtmp/PARADOX/
- STUDENTS.DB</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Read">
- <title>Viewing Highest File Read Calls </title>
- <para>You can view highest read calls on each brick. If brick name is not specified, then by default, list of
-100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Read calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Read calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Read calls on brick server:/export of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick:</computeroutput> <replaceable>server:/export/dir1</replaceable><programlisting> ==========Read file stats========
-
-read filename
-call count
-
-116 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL
-
-54 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client3/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client9/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client8/~dmtmp/SEED/LARGE.FIL</programlisting> </para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Write">
- <title>Viewing Highest File Write Calls </title>
- <para>You can view list of files which has highest file write calls on each brick. If brick name is not
-specified, then by default, list of 100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Write calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Write calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Write calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Write file stats========
-write call count filename
-
-83 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client7/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client1/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client8/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client3/~dmtmp/SEED/LARGE.FIL</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls">
- <title>Viewing Highest Open Calls on Directories </title>
- <para>You can view list of files which has highest open calls on directories of each brick. If brick name is
-not specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para>To view list of open calls on each directory</para>
- <itemizedlist>
- <listitem>
- <para>View list of open calls on each directory using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> opendir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view open calls on brick server:/export/ of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> opendir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Directory open stats========
-
-Opendir count directory name
-
-1001 /clients/client0/~dmtmp
-
-454 /clients/client8/~dmtmp
-
-454 /clients/client2/~dmtmp
-
-454 /clients/client6/~dmtmp
-
-454 /clients/client5/~dmtmp
-
-454 /clients/client9/~dmtmp
-
-443 /clients/client0/~dmtmp/PARADOX
-
-408 /clients/client1/~dmtmp
-
-408 /clients/client7/~dmtmp
-
-402 /clients/client4/~dmtmp</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls">
- <title>Viewing Highest Read Calls on Directory </title>
- <para>You can view list of files which has highest directory read calls on each brick. If brick name is not
-specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para><emphasis role="bold">To view list of highest directory read calls on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of highest directory read calls on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> readdir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest directory read calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> readdir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command> </para>
- <para><code>Brick: <replaceable>server:/export/dir1</replaceable></code><programlisting>==========Directory readdirp stats========
-
-readdirp count directory name
-
-1996 /clients/client0/~dmtmp
-
-1083 /clients/client0/~dmtmp/PARADOX
-
-904 /clients/client8/~dmtmp
-
-904 /clients/client2/~dmtmp
-
-904 /clients/client6/~dmtmp
-
-904 /clients/client5/~dmtmp
-
-904 /clients/client9/~dmtmp
-
-812 /clients/client1/~dmtmp
-
-812 /clients/client7/~dmtmp
-
-800 /clients/client4/~dmtmp</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf">
- <title>Viewing List of Read Performance on each Brick </title>
- <para>You can view the read throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the read
-throughput.
-</para>
- <para><programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.894610
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572 </programlisting>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-</para>
- <para><emphasis role="bold">To view list of read performance on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of read performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view read performance on brick server:/export/ of test-volume, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s </computeroutput></para>
- <programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.896486
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572
- </programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf">
- <title>Viewing List of Write Performance on each Brick </title>
- <para>You can view list of write throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the write
-throughput.
-</para>
- <para>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-To view list of write performance on each brick:
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of write performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view write performance on brick <replaceable>server:/export/</replaceable> of <replaceable>test-volume</replaceable>, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick</code>: <replaceable>server:/export/dir1</replaceable>
-</para>
- <para><code>256 bytes (256 B) copied, Throughput: 2.8 MB/s </code><programlisting> ==========Write throughput file stats========
-
-write filename Time
-throughput
-(MBps)
-
-1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:39:09.171494
-
-1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.73189
-
-949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.927426
-
-936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.933177
-897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.33628
-
-897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.27713
-
-885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:38:36.924271
-
-528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.81893
-
-516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
- FASTENER.MDB 15:39:01.797317
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Information">
- <title>Displaying Volume Information </title>
- <para>You can display information about a specific volume, or all volumes, as needed.</para>
- <para><emphasis role="bold">To display volume information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume info </command><varname>VOLNAME</varname></para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume info all</command></para>
- <para><programlisting># gluster volume info all
-
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4
-
-Volume Name: mirror
-Type: Distributed-Replicate
-Status: Started
-Number of Bricks: 2 X 2 = 4
-Bricks:
-Brick1: server1:/brick1
-Brick2: server2:/brick2
-Brick3: server3:/brick3
-Brick4: server4:/brick4
-
-Volume Name: Vol
-Type: Distribute
-Status: Started
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6
-
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Performing_Statedump">
- <title>Performing Statedump on a Volume </title>
- <para>Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of issuing the command.You can perform statedumps of the brick processes and nfs server process of a volume using the statedump command. The following options can be used to determine what information is to be dumped:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Dumps the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">iobuf</emphasis> - Dumps iobuf details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">priv</emphasis> - Dumps private information of loaded translators.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Dumps the pending calls of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Dumps the open fd tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Dumps the inode tables of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume statedump </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display statedump of a volume or NFS server using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> [nfs] [all|mem|iobuf|callpool|priv|fd|inode]</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- </listitem>
- <listitem>
- <para>By defult, the output of the statedump is stored at <filename> /tmp/&lt;brickname.PID.dump&gt;</filename> file on that particular server. Change the directory of the statedump file using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> server.statedump-path <replaceable>path</replaceable></command></para>
- <para>For example, to change the location of the statedump file of test-volume:</para>
- <para><programlisting># gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/
-Set volume successful</programlisting></para>
- <para>You can view the changed path of the statedump file using the following command:</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Status">
- <title>Displaying Volume Status </title>
- <para>You can display the status information about a specific volume, brick or all volumes, as needed. Status information can be used to understand the current status of the brick, nfs processes, and overall file system. Status information can also be used to monitor and debug the volume information. You can view status of the volume along with the following details:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">detail</emphasis> - Displays additional information about the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">clients</emphasis> - Displays the list of clients connected to the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Displays the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Displays the inode tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Displays the open fd (file descriptors) tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Displays the pending calls of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume status </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume status [all|<replaceable>VOLNAME</replaceable> [<replaceable>BRICKNAME</replaceable>]] [detail|clients|mem|inode|fd|callpool]</command> </para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume status all</command>
-</para>
- <para><programlisting># gluster volume status all
-STATUS OF VOLUME: volume-test
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/4 24010 Y 22455
-
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display additional information about the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> detail</command>
-</para>
- <para>For example, to display additional information about the bricks of test-volume:</para>
- <para><programlisting># gluster volume status test-volume details
-STATUS OF VOLUME: test-volume
--------------------------------------------
-Brick : arch:/export/1
-Port : 24009
-Online : Y
-Pid : 16977
-File System : rootfs
-Device : rootfs
-Mount Options : rw
-Disk Space Free : 13.8GB
-Total Disk Space : 46.5GB
-Inode Size : N/A
-Inode Count : N/A
-Free Inodes : N/A
-
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the list of clients accessing the volumes using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> clients</command>
-</para>
- <para>For example, to display the list of clients connected to test-volume:</para>
- <para><programlisting># gluster volume status test-volume clients
-Brick : arch:/export/1
-Clients connected : 2
-Hostname Bytes Read BytesWritten
--------- --------- ------------
-127.0.0.1:1013 776 676
-127.0.0.1:1012 50440 51200</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the memory usage and memory pool details of the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> mem</command>
-</para>
- <para>For example, to display the memory usage and memory pool details of the bricks of test-volume:</para>
- <screen>Memory status for volume : test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Mallinfo
---------
-Arena : 434176
-Ordblks : 2
-Smblks : 0
-Hblks : 12
-Hblkhd : 40861696
-Usmblks : 0
-Fsmblks : 0
-Uordblks : 332416
-Fordblks : 101760
-Keepcost : 100400
-
-Mempool Stats
--------------
-Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc
----- -------- --------- ------------ ---------- --------
-test-volume-server:fd_t 0 16384 92 57 5
-test-volume-server:dentry_t 59 965 84 59 59
-test-volume-server:inode_t 60 964 148 60 60
-test-volume-server:rpcsvc_request_t 0 525 6372 351 2
-glusterfs:struct saved_frame 0 4096 124 2 2
-glusterfs:struct rpc_req 0 4096 2236 2 2
-glusterfs:rpcsvc_request_t 1 524 6372 2 1
-glusterfs:call_stub_t 0 1024 1220 288 1
-glusterfs:call_stack_t 0 8192 2084 290 2
-glusterfs:call_frame_t 0 16384 172 1728 6</screen>
- </listitem>
- <listitem>
- <para>Display the inode tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> inode</command>
-</para>
- <para>For example, to display the inode tables of the test-volume:</para>
- <para><programlisting># gluster volume status test-volume inode
-inode tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Active inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-6f3fe173-e07a-4209-abb6-484091d75499 1 9 2
-370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2
-
-LRU inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1
-3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1
-2ce0197d-87a9-451b-9094-9baa38121155 1 0 2</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the open fd tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> fd</command>
-</para>
- <para>For example, to display the open fd tables of the test-volume:</para>
- <para><screen># gluster volume status test-volume fd
-
-FD tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Connection 1:
-RefCount = 0 MaxFDs = 128 FirstFree = 4
-FD Entry PID RefCount Flags
--------- --- -------- -----
-0 26311 1 2
-1 26310 3 2
-2 26310 1 2
-3 26311 3 2
-
-Connection 2:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds
-
-Connection 3:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds</screen></para>
- </listitem>
- <listitem>
- <para>Display the pending calls of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> callpool</command>
-</para>
- <para>Each call has a call stack containing call frames.</para>
- <para>For example, to display the pending calls of test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-
-Pending calls for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Pending calls: 2
-Call Stack1
- UID : 0
- GID : 0
- PID : 26338
- Unique : 192138
- Frames : 7
- Frame 1
- Ref Count = 1
- Translator = test-volume-server
- Completed = No
- Frame 2
- Ref Count = 0
- Translator = test-volume-posix
- Completed = No
- Parent = test-volume-access-control
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 3
- Ref Count = 1
- Translator = test-volume-access-control
- Completed = No
- Parent = repl-locks
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 4
- Ref Count = 1
- Translator = test-volume-locks
- Completed = No
- Parent = test-volume-io-threads
- Wind From = iot_fsync_wrapper
- Wind To = FIRST_CHILD (this)-&gt;fops-&gt;fsync
- Frame 5
- Ref Count = 1
- Translator = test-volume-io-threads
- Completed = No
- Parent = test-volume-marker
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 6
- Ref Count = 1
- Translator = test-volume-marker
- Completed = No
- Parent = /export/1
- Wind From = io_stats_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 7
- Ref Count = 1
- Translator = /export/1
- Completed = No
- Parent = test-volume-server
- Wind From = server_fsync_resume
- Wind To = bound_xl-&gt;fops-&gt;fsync</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_setting_volumes.xml b/doc/admin-guide/en-US/admin_setting_volumes.xml
deleted file mode 100644
index 6a8468d5f..000000000
--- a/doc/admin-guide/en-US/admin_setting_volumes.xml
+++ /dev/null
@@ -1,325 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Setting_Volumes">
- <title>Setting up GlusterFS Server Volumes</title>
- <para>A volume is a logical collection of bricks where each brick is an export directory on a server in the trusted storage pool. Most of the gluster management operations are performed on the volume. </para>
- <para>To create a new volume in your storage environment, specify the bricks that comprise the volume. After you have created a new volume, you must start it before attempting to mount it. </para>
- <itemizedlist>
- <listitem>
- <para>Volumes of the following types can be created in your storage environment: </para>
- <itemizedlist>
- <listitem>
- <para>Distributed - Distributed volumes distributes files throughout the bricks in the volume. You can use distributed volumes where the requirement is to scale storage and the redundancy is either not important or is provided by other hardware/software layers. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed"/> .</para>
- </listitem>
- <listitem>
- <para>Replicated – Replicated volumes replicates files across bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Replicated"/>.</para>
- </listitem>
- <listitem>
- <para>Striped – Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Striped - Distributed striped volumes stripe data across two or more nodes in the cluster. You should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Replicated - Distributed replicated volumes distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated"/>. </para>
- </listitem>
- <listitem>
- <para>Distributed Striped Replicated – Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated"/>.
-</para>
- </listitem>
- <listitem>
- <para>Striped Replicated – Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more
-information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped_Replicated"/>.</para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To create a new volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Create a new volume :</para>
- <para><command># gluster volume create<replaceable> NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable> | replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp, rdma] <replaceable>NEW-BRICK1 NEW-BRICK2 NEW-BRICK3...</replaceable></command></para>
- <para>For example, to create a volume called test-volume consisting of server3:/exp3 and server4:/exp4:</para>
- <para><programlisting># gluster volume create test-volume server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed">
- <title>Creating Distributed Volumes</title>
- <para>In a distributed volumes files are spread randomly across the bricks in the volume. Use distributed volumes where you need to scale storage and redundancy is either not important or is provided by other hardware/software layers. </para>
- <para><note>
- <para>Disk/server failure in distributed volumes can result in a serious loss of data because directory contents are spread randomly across the bricks in the volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed volume</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed volume with four storage servers using tcp:</para>
- <para><programlisting># gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>(Optional) You can display the volume information:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- <para>For example, to create a distributed volume with four storage servers over InfiniBand:</para>
- <para><programlisting># gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Replicated">
- <title>Creating Replicated Volumes </title>
- <para>Replicated volumes create copies of files across multiple bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. </para>
- <para><note>
- <para>The number of bricks should be equal to of the replica count for a replicated volume.
-To protect against server and disk failures, it is recommended that the bricks of the volume are from different servers. </para>
- </note></para>
- <figure>
- <title>Illustration of a Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a replicated volume with two storage servers:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped">
- <title>Creating Striped Volumes</title>
- <para>Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files.</para>
- <para><note>
- <para>The number of bricks should be a equal to the stripe count for a striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped volume across two storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped">
- <title>Creating Distributed Striped Volumes </title>
- <para>Distributed striped volumes stripes files across two or more nodes in the cluster. For best results, you should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the stripe count for a distributed striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed striped volume across eight storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated">
- <title>Creating Distributed Replicated Volumes </title>
- <para>Distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replica count for a distributed replicated volume. Also, the order in which bricks are specified has a great effect on data protection. Each replica_count consecutive bricks in the list you give will form a replica set, with all replica sets combined into a volume-wide distribute set. To make sure that replica-set members are not placed on the same node, list the first brick on every server, then the second brick on every server in the same order, and so on. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, four node distributed (replicated) volume with a two-way mirror:
-</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>For example, to create a six node distributed (replicated) volume with a two-way mirror:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated">
- <title>Creating Distributed Striped Replicated Volumes </title>
- <para>Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. </para>
- <para><note>
- <para>The number of bricks should be a multiples of number of stripe count and replica count for
-a distributed striped replicated volume.
- </para>
- </note></para>
- <para><emphasis role="bold">To create a distributed striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a distributed striped replicated volume using the following command:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed replicated striped volume across eight storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped_Replicated">
- <title>Creating Striped Replicated Volumes </title>
- <para>Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replicate count and stripe count for a
-striped replicated volume.
-</para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool consisting of the storage servers that will comprise the volume.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a striped replicated volume :</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped replicated volume across four storage servers:
-
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>To create a striped replicated volume across six storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Starting">
- <title>Starting Volumes </title>
- <para>You must start your volumes before you try to mount them. </para>
- <para><emphasis role="bold">To start a volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start a volume:</para>
- <para><command># gluster volume start <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to start test-volume:</para>
- <para><programlisting># gluster volume start test-volume
-Starting test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_settingup_clients.xml b/doc/admin-guide/en-US/admin_settingup_clients.xml
deleted file mode 100644
index 22979acf4..000000000
--- a/doc/admin-guide/en-US/admin_settingup_clients.xml
+++ /dev/null
@@ -1,511 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-GlusterFS_Client">
- <title>Accessing Data - Setting Up GlusterFS Client</title>
- <para>You can access gluster volumes in multiple ways. You can use Gluster Native Client method for high concurrency, performance and transparent failover in GNU/Linux clients. You can also use NFS v3 to access gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up) and Windows Server 2003. Other NFS client implementations may work with gluster NFS server.</para>
- <para>You can use CIFS to access volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. </para>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native">
- <title>Gluster Native Client</title>
- <para>The Gluster Native Client is a FUSE-based client running in user space. Gluster Native Client is the recommended method for accessing volumes when high concurrency and high write performance is required.</para>
- <para>This section introduces the Gluster Native Client and explains how to install the software on client machines. This section also describes how to mount volumes on clients (both manually and automatically) and how to verify that the volume has mounted successfully. </para>
- <section>
- <title>Installing the Gluster Native Client</title>
- <para>Before you begin installing the Gluster Native Client, you need to verify that the FUSE module is loaded on the client and has access to the required modules as follows: </para>
- <orderedlist>
- <listitem>
- <para>Add the FUSE loadable kernel module (LKM) to the Linux kernel:</para>
- <para><command># modprobe fuse</command></para>
- </listitem>
- <listitem>
- <para>Verify that the FUSE module is loaded:</para>
- <para><command># dmesg | grep -i fuse </command></para>
- <para><command>fuse init (API version 7.13)</command></para>
- </listitem>
- </orderedlist>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM">
- <title>Installing on Red Hat Package Manager (RPM) Distributions </title>
- <para>To install Gluster Native Client on RPM distribution-based systems</para>
- <orderedlist>
- <listitem>
- <para>Install required prerequisites on the client using the following command:</para>
- <para><command>$ sudo yum -y install openssh-server wget fuse fuse-libs openib libibverbs</command></para>
- </listitem>
- <listitem>
- <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open.</para>
- <para>You can use the following chains with iptables:</para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para>
- <para><note>
- <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>Download the latest glusterfs, glusterfs-fuse, and glusterfs-rdma RPM files to each client. The glusterfs package contains the Gluster Native Client. The glusterfs-fuse package contains the FUSE translator required for mounting on client systems and the glusterfs-rdma packages contain OpenFabrics verbs RDMA module for Infiniband.</para>
- <para>You can download the software at <ulink url="http://bits.gluster.com/gluster/glusterfs/3.3.0qa30/x86_64/"/>.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client.</para>
- <para><command>$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm</command></para>
- <para><note>
- <para>The RDMA module is only required when using Infiniband.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section condition="gfs">
- <title>Installing on Debian-based Distributions</title>
- <para>To install Gluster Native Client on Debian-based distributions</para>
- <orderedlist>
- <listitem>
- <para>Install OpenSSH Server on each client using the following command:</para>
- <para><command>$ sudo apt-get install openssh-server vim wget</command></para>
- </listitem>
- <listitem>
- <para>Download the latest GlusterFS .deb file and checksum to each client.</para>
- <para>You can download the software at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>For each .deb file, get the checksum (using the following command) and compare it against the checksum for that file in the md5sum file.</para>
- <para>
-<command>$ md5sum GlusterFS_DEB_file.deb </command></para>
- <para>The md5sum of the packages is available at: <ulink url="http://download.gluster.com/pub/gluster/glusterfs"/></para>
- </listitem>
- <listitem>
- <para>Uninstall GlusterFS v3.1 (or an earlier version) from the client using the following command:
-</para>
- <para><command>$ sudo dpkg -r glusterfs </command></para>
- <para>(Optional) Run <command>$ sudo dpkg -purge glusterfs </command>to purge the configuration files.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client using the following command:
-</para>
- <para><command>$ sudo dpkg -i GlusterFS_DEB_file </command></para>
- <para>For example:
-</para>
- <para><command>$ sudo dpkg -i glusterfs-3.3.x.deb </command></para>
- </listitem>
- <listitem>
- <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open.
-</para>
- <para>You can use the following chains with iptables:
-</para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para>
- <para><note>
- <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Performing a Source Installation</title>
- <para>To build and install Gluster Native Client from the source code</para>
- <orderedlist>
- <listitem>
- <para>Create a new directory using the following commands:</para>
- <para><command># mkdir glusterfs </command></para>
- <para><command># cd glusterfs</command></para>
- </listitem>
- <listitem>
- <para>Download the source code.
-</para>
- <para>You can download the source at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>Extract the source code using the following command:
-</para>
- <para><command># tar -xvzf SOURCE-FILE </command></para>
- </listitem>
- <listitem>
- <para>Run the configuration utility using the following command:
-</para>
- <para><code># ./configure </code></para>
- <para><code>GlusterFS configure summary </code></para>
- <para><code>================== </code></para>
- <para><code>FUSE client : yes </code></para>
- <para><code>Infiniband verbs : yes </code></para>
- <para><code>epoll IO multiplex : yes </code></para>
- <para><code>argp-standalone : no </code></para>
- <para><code>fusermount : no </code></para>
- <para><code>readline : yes</code></para>
- <para>The configuration summary shows the components that will be built with Gluster Native Client.</para>
- </listitem>
- <listitem>
- <para>Build the Gluster Native Client software using the following commands:
-</para>
- <para><command># make </command></para>
- <para><command># make install</command></para>
- </listitem>
- <listitem>
- <para>Verify that the correct version of Gluster Native Client is installed, using the following command:
-</para>
- <para><command># glusterfs –-version</command></para>
- </listitem>
- </orderedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes">
- <title>Mounting Volumes</title>
- <para>After installing the Gluster Native Client, you need to mount Gluster volumes to access data. There are two methods you can choose: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing"/>. </para>
- <para><note>
- <para>Server names selected during creation of Volumes should be resolvable in the client machine. You can use appropriate /etc/hosts entries or DNS server to resolve server names to IP addresses. </para>
- </note></para>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall">
- <title>Manually Mounting Volumes</title>
- <para>To manually mount a Gluster volume </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs server1:/test-volume /mnt/glusterfs</command></para>
- <note>
- <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount).
-
-</para>
- <para>If you see a usage message like &quot;Usage: mount.glusterfs&quot;, mount usually requires you to create a directory to be used as the mount point. Run &quot;mkdir /mnt/glusterfs&quot; before you attempt to run the mount command listed above.</para>
- </note>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Mounting Options</emphasis></para>
- <para>You can specify the following options when using the <command>mount -t glusterfs</command> command. Note that you need to separate all options with commas.
-
-</para>
- <para>backupvolfile-server=server-name</para>
- <para>volfile-max-fetch-attempts=number of attempts</para>
- <para>log-level=loglevel
-</para>
- <para>log-file=logfile
-</para>
- <para>transport=transport-type
-</para>
- <para>direct-io-mode=[enable|disable]
-
-</para>
- <para>For example:
-</para>
- <para><code># mount -t glusterfs -o backupvolfile-server=volfile_server2 --volfile-max-fetch-attempts=2 log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs</code></para>
- <para>If <option>backupvolfile-server</option> option is added while mounting fuse client, when the first
-volfile server fails, then the server specified in <option>backupvolfile-server</option> option is used as volfile server to mount
-the client.</para>
- <para>In <code>--volfile-max-fetch-attempts=X</code> option, specify the number of attempts to fetch volume files while mounting a volume. This option is useful when you mount a server with multiple IP addresses or when round-robin DNS is configured for the server-name.. </para>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic" dir="lro">
- <title>Automatically Mounting Volumes</title>
- <para>You can configure your system to automatically mount the Gluster volume each time your system starts. </para>
- <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). </para>
- <para><emphasis role="bold">To automatically mount a Gluster volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:
-</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 </command></para>
- <para>For example:
-</para>
- <para><code>server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0</code></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Mounting Options </emphasis></para>
- <para>You can specify the following options when updating the /etc/fstab file. Note that you need to separate all options with commas.
-
-</para>
- <para>log-level=loglevel
-</para>
- <para>log-file=logfile
-</para>
- <para>transport=transport-type
-</para>
- <para>direct-io-mode=[enable|disable]
-
-</para>
- <para>For example:
-</para>
- <para><code>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 </code></para>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing">
- <title>Testing Mounted Volumes</title>
- <para>To test mounted volumes</para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># mount </command></para>
- <para>If the gluster volume was successfully mounted, the output of the mount command on the client will be similar to this example:
-
-</para>
- <para><code>server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072</code></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># df</command>
-</para>
- <para>The output of df command on the client will display the aggregated storage space from all the bricks in a volume similar to this example:
-</para>
- <para><code># df -h /mnt/glusterfs Filesystem Size Used Avail Use% Mounted on server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</code></para>
- </listitem>
- <listitem>
- <para>Change to the directory and list the contents by entering the following:
-</para>
- <para><command># cd MOUNTDIR </command></para>
- <para><command># ls</command></para>
- </listitem>
- <listitem>
- <para>For example,</para>
- <para><code># cd /mnt/glusterfs </code></para>
- <para><code># ls</code></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS">
- <title>NFS</title>
- <para>You can use NFS v3 to access to gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up), Windows Server 2003, and others, may work with gluster NFS server implementation. </para>
- <para>GlusterFS now includes network lock manager (NLM) v4. NLM enables applications on NFSv3 clients to do record locking on files on NFS server. It is started automatically whenever the NFS server is run.</para>
- <para condition="gfs">You must install nfs-common package on both servers and clients (only for Debian-based) distribution.</para>
- <para>This section describes how to use NFS to mount Gluster volumes (both manually and automatically) and how to verify that the volume has been mounted successfully. </para>
- <section>
- <title>Using NFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <para><itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic"/></para>
- </listitem>
- </itemizedlist></para>
- <para condition="gfs"><emphasis role="bold">Prerequisite</emphasis>: Install nfs-common package on both servers and clients (only for Debian-based distribution), using the following command: </para>
- <para condition="gfs"><command>$ sudo aptitude install nfs-common </command></para>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Testing"/>. </para>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Manual">
- <title>Manually Mounting Volumes Using NFS </title>
- <para>To manually mount a Gluster volume using NFS </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:</para>
- <para><command># mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs</command></para>
- <para><note>
- <para> Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears:
-</para>
- <para><code>requested NFS version or transport protocol is not supported</code>. </para>
- </note></para>
- <para><emphasis role="bold">To connect using TCP</emphasis></para>
- </listitem>
- <listitem>
- <para>Add the following option to the mount command:
-</para>
- <para><command>-o mountproto=tcp </command></para>
- <para>For example:
-</para>
- <para><command># mount -o mountproto=tcp -t nfs server1:/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To mount Gluster NFS server from a Solaris client </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR</command></para>
- <para>
-For example:</para>
- <para><command> # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic">
- <title>Automatically Mounting Volumes Using NFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using NFS each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using NFS </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0</command></para>
- <para>For example,</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0</command></para>
- <note>
- <para>Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears: </para>
- <para><command>requested NFS version or transport protocol is not supported.</command></para>
- </note>
- <para/>
- <para>To connect using TCP </para>
- </listitem>
- <listitem>
- <para>Add the following entry in /etc/fstab file :</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,mountproto=tcp 0 0</command></para>
- <para>For example,</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To automount NFS mounts</emphasis></para>
- <para>Gluster supports *nix standard method of automounting NFS mounts. Update the /etc/auto.master and /etc/auto.misc and restart the autofs service. After that, whenever a user or process attempts to access the directory it will be mounted in the background. </para>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Testing">
- <title>Testing Volumes Mounted Using NFS</title>
- <para>You can confirm that Gluster directories are mounting successfully. </para>
- <para><emphasis role="bold">To test mounted volumes</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Use the mount command by entering the following:</para>
- <para><command># mount</command></para>
- <para>For example, the output of the mount command on the client will display an entry like the following:</para>
- <para><command>server1:/test-volume on /mnt/glusterfs type nfs (rw,vers=3,addr=server1)</command></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Use the df command by entering the following:</para>
- <para><command># df</command></para>
- <para>For example, the output of df command on the client will display the aggregated storage space from all the bricks in a volume.</para>
- <para><screen># df -h /mnt/glusterfs
-Filesystem Size Used Avail Use% Mounted on
-server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</screen></para>
- </listitem>
- <listitem>
- <para>Change to the directory and list the contents by entering the following:</para>
- <para><command># cd MOUNTDIR</command></para>
- <para><command># ls</command></para>
- <para>For example,</para>
- <para><command>
- <command># cd /mnt/glusterfs</command>
- </command></para>
- <para><command># ls</command></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS">
- <title>CIFS</title>
- <para>You can use CIFS to access to volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. You can export glusterfs mount point as the samba export, and then mount it using CIFS protocol.</para>
- <para>This section describes how to mount CIFS shares on Microsoft Windows-based clients (both manually and automatically) and how to verify that the volume has mounted successfully.</para>
- <para><note>
- <para> CIFS access using the Mac OS X Finder is not supported, however, you can use the Mac OS X command line to access Gluster volumes using CIFS.</para>
- </note></para>
- <section>
- <title>Using CIFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing"/>.</para>
- <para>You can also use Samba for exporting Gluster Volumes through CIFS protocol.</para>
- <section>
- <title>Exporting Gluster Volumes Through Samba</title>
- <para>We recommend you to use Samba for exporting Gluster volumes through the CIFS protocol. </para>
- <para><emphasis role="bold">To export volumes through CIFS protocol </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Mount a Gluster volume. For more information on mounting volumes, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes"/>.</para>
- </listitem>
- <listitem>
- <para>Setup Samba configuration to export the mount point of the Gluster volume.</para>
- <para>For example, if a Gluster volume is mounted on /mnt/gluster, you must edit smb.conf file to enable exporting this through CIFS. Open smb.conf file in an editor and add the following lines for a simple configuration:</para>
- <para>[glustertest]
- </para>
- <para> comment = For testing a Gluster volume exported through CIFS
- </para>
- <para> path = /mnt/glusterfs
- </para>
- <para> read only = no
- </para>
- <para> guest ok = yes</para>
- </listitem>
- </orderedlist>
- <para>Save the changes and start the smb service using your systems init scripts (/etc/init.d/smb [re]start).</para>
- <para><note>
- <para>To be able mount from any server in the trusted storage pool, you must repeat these steps on each Gluster node. For more advanced configurations, see Samba documentation. </para>
- </note></para>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual">
- <title>Manually Mounting Volumes Using CIFS </title>
- <para>You can manually mount Gluster volumes using CIFS on Microsoft Windows-based client machines. </para>
- <para><emphasis role="bold">To manually mount a Gluster volume using CIFS </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- <para>The network drive (mapped to the volume) appears in the Computer window.</para>
- <para><emphasis role="bold">Alternatively, to manually mount a Gluster volume using CIFS.</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Click <emphasis role="bold">Start &gt; Run</emphasis> and enter the following:</para>
- <para><command>
- <code>\\SERVERNAME\VOLNAME</code>
- </command></para>
- <para>For example:</para>
- <para><command>
- <code>\\server1\test-volume</code>
- </command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic">
- <title>Automatically Mounting Volumes Using CIFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using CIFS on Microsoft Windows-based clients each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using CIFS</emphasis></para>
- <para>The network drive (mapped to the volume) appears in the Computer window and is reconnected each time the system starts.</para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click the <emphasis role="bold">Reconnect</emphasis> at logon checkbox.</para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing">
- <title>Testing Volumes Mounted Using CIFS</title>
- <para>You can confirm that Gluster directories are mounting successfully by navigating to the directory using Windows Explorer. </para>
- </section>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_start_stop_daemon.xml b/doc/admin-guide/en-US/admin_start_stop_daemon.xml
deleted file mode 100644
index bdab0b8b6..000000000
--- a/doc/admin-guide/en-US/admin_start_stop_daemon.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Start_Stop_Daemon">
- <title id="chap-Administration_Guide-Stop_Start_Daemon">Managing the glusterd Service</title>
- <para>After installing GlusterFS, you must start glusterd service. The glusterd service serves as the Gluster elastic volume manager, overseeing glusterfs processes, and co-ordinating dynamic volume operations, such as adding and removing volumes across multiple storage servers non-disruptively.</para>
- <para>This section describes how to start the glusterd service in the following ways: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Manually"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Automatically"/></para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You must start glusterd on all GlusterFS servers.</para>
- </note>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Manually">
- <title>Starting and Stopping glusterd Manually</title>
- <para>This section describes how to start and stop glusterd manually</para>
- <itemizedlist>
- <listitem>
- <para>To start glusterd manually, enter the following command:</para>
- <para><command># /etc/init.d/glusterd start </command></para>
- </listitem>
- <listitem>
- <para>To stop glusterd manually, enter the following command: </para>
- <para><command># /etc/init.d/glusterd stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Automatically">
- <title>Starting glusterd Automatically</title>
- <para condition="gfs">This section describes how to configure the system to automatically start the glusterd service every time the system boots. </para>
- <para condition="appliance">To automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para condition="appliance"><command># chkconfig glusterd on </command></para>
- <section condition="gfs">
- <title condition="gfs">Red Hat-based Systems</title>
- <para>To configure Red Hat-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para><command># chkconfig glusterd on </command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Debian-based Systems</title>
- <para>To configure Debian-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line:</para>
- <para><command># update-rc.d glusterd defaults</command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Systems Other than Red Hat and Debain</title>
- <para>To configure systems other than Red Hat or Debian to automatically start the glusterd service every time the system boots, enter the following entry to the<emphasis role="italic"> /etc/rc.local</emphasis> file: </para>
- <para><command># echo &quot;glusterd&quot; &gt;&gt; /etc/rc.local </command></para>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_storage_pools.xml b/doc/admin-guide/en-US/admin_storage_pools.xml
deleted file mode 100644
index 87b6320bd..000000000
--- a/doc/admin-guide/en-US/admin_storage_pools.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Storage-pool">
- <title>Setting up Trusted Storage Pools</title>
- <para>Before you can configure a GlusterFS volume, you must create a trusted storage pool consisting of the storage servers that provides bricks to a volume. </para>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone. To add additional storage servers to the storage pool, you can use the probe command from a storage server that is already trusted. </para>
- <para><note>
- <para>Do not self-probe the first server/localhost.</para>
- </note></para>
- <para>The GlusterFS service must be running on all storage servers that you want to add to the storage pool. See <xref linkend="chap-Administration_Guide-Start_Stop_Daemon"/> for more information.</para>
- <section id="sect-Administration_Guide-Storage_Pools-Adding_Servers">
- <title>Adding Servers to Trusted Storage Pool</title>
- <para>To create a trusted storage pool, add servers to the trusted storage pool</para>
- <orderedlist>
- <listitem>
- <para>The hostnames used to create the storage pool must be resolvable by DNS.</para>
- <para>To add a server to the storage pool:</para>
- <para><command># gluster peer probe <replaceable>server</replaceable></command></para>
- <para>For example, to create a trusted storage pool of four servers, add three servers to the storage pool from server1:</para>
- <para><programlisting># gluster peer probe server2
-Probe successful
-
-# gluster peer probe server3
-Probe successful
-
-# gluster peer probe server4
-Probe successful
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the peer status from the first server using the following commands:</para>
- <para><programlisting># gluster peer status
-Number of Peers: 3
-
-Hostname: server2
-Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5
-State: Peer in Cluster (Connected)
-
-Hostname: server3
-Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7
-State: Peer in Cluster (Connected)
-
-Hostname: server4
-Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
-State: Peer in Cluster (Connected)</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Removing Servers from the Trusted Storage Pool</title>
- <para>To remove a server from the storage pool:</para>
- <para><command># gluster peer detach<replaceable> server</replaceable></command></para>
- <para> For example, to remove server4 from the trusted storage pool:</para>
- <para><programlisting># gluster peer detach server4
-Detach successful</programlisting></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_troubleshooting.xml b/doc/admin-guide/en-US/admin_troubleshooting.xml
deleted file mode 100644
index dff182c5f..000000000
--- a/doc/admin-guide/en-US/admin_troubleshooting.xml
+++ /dev/null
@@ -1,509 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Troubleshooting">
- <title>Troubleshooting GlusterFS </title>
- <para>This section describes how to manage GlusterFS logs and most common troubleshooting scenarios
-related to GlusterFS.
-</para>
- <section>
- <title>Managing GlusterFS Logs </title>
- <para>This section describes how to manage GlusterFS logs by performing the following operation:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>Rotating Logs
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Rotating Logs </title>
- <para>Administrators can rotate the log file in a volume, as needed.
-</para>
- <para><emphasis role="bold">To rotate a log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate the log file using the following command:
-</para>
- <para><command># gluster volume log rotate <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to rotate the log file on test-volume:
-</para>
- <programlisting># gluster volume log rotate test-volume
-log rotate successful
-</programlisting>
- <note>
- <para>When a log file is rotated, the contents of the current log file are moved to log-file-
-name.epoch-time-stamp.
-</para>
- </note>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Troubleshooting Geo-replication </title>
- <para>This section describes the most common troubleshooting scenarios related to GlusterFS Geo-replication.
-</para>
- <section>
- <title>Locating Log Files </title>
- <para>For every Geo-replication session, the following three log files are associated to it (four, if the slave is a
-gluster volume):
-</para>
- <itemizedlist>
- <listitem>
- <para>Master-log-file - log file for the process which monitors the Master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-log-file - log file for process which initiates the changes in slave
-</para>
- </listitem>
- <listitem>
- <para>Master-gluster-log-file - log file for the maintenance mount point that Geo-replication module
-uses to monitor the master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-gluster-log-file - is the slave&apos;s counterpart of it
-</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Master Log File</emphasis>
-</para>
- <para>To get the Master-log-file for geo-replication, use the following command:
-</para>
- <para><command>gluster volume geo-replication <code>MASTER SLAVE</code> config log-file</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file </command></para>
- <para><emphasis role="bold">Slave Log File </emphasis></para>
- <para>To get the log file for Geo-replication on slave (glusterd must be running on slave machine), use the
-following commands:
-</para>
- <orderedlist>
- <listitem>
- <para>On master, run the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 </command></para>
- <para>Displays the session owner details.
-</para>
- </listitem>
- <listitem>
- <para>On slave, run the following command:
-</para>
- <para><command># gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log </command></para>
- </listitem>
- <listitem>
- <para>Replace the session owner details (output of Step 1) to the output of the Step 2 to get the
-location of the log file.
-</para>
- <para><command>/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log</command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Rotating Geo-replication Logs</title>
- <para>Administrators can rotate the log file of a particular master-slave session, as needed.
-When you run geo-replication&apos;s <command> log-rotate</command> command, the log file
-is backed up with the current timestamp suffixed to the file
-name and signal is sent to gsyncd to start logging to a new
-log file.</para>
- <para><emphasis role="bold">To rotate a geo-replication log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate log file for a particular master-slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master slave</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename> and slave <filename>example.com:/data/remote_dir</filename>
-:
-</para>
- <programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions for a master volume using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename>:
-</para>
- <programlisting># gluster volume geo-replication Volume1 log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication log-rotate</command>
-</para>
- <para>For example, to rotate the log file for all sessions:</para>
- <programlisting># gluster volume geo-replication log rotate
-log rotate successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Synchronization is not complete </title>
- <para><emphasis role="bold">Description</emphasis>: GlusterFS Geo-replication did not synchronize the data completely but still the geo-
-replication status displayed is OK.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: You can enforce a full sync of the data by erasing the index and restarting GlusterFS Geo-
-replication. After restarting, GlusterFS Geo-replication begins synchronizing all the data. All files are compared using checksum, which can be a lengthy and high resource utilization operation on large
-data sets. If the error situation persists, contact Red Hat Support.
-</para>
- <para>For more information about erasing index, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/>.
-</para>
- </section>
- <section>
- <title>Issues in Data Synchronization </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication display status as OK, but the files do not get synced, only
-directories and symlink gets synced with the following error message in the log:
-</para>
- <para><errortext>[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to sync ./some_file` </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: Geo-replication invokes rsync v3.0.0 or higher on the host and the remote machine. You must verify if
-you have installed the required version.
-</para>
- </section>
- <section>
- <title>Geo-replication status displays Faulty very often </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication displays status as faulty very often with a backtrace similar to
-the following:
-</para>
- <para><errortext>2011-04-28 14:06:18.378859] E [syncdutils:131:log_raise_exception] &lt;top&gt;: FAIL: Traceback (most recent call last): File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py&quot;, line 152, in twraptf(*aa) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 118, in listen rid, exc, res = recv(self.inf) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 42, in recv return pickle.load(inf) EOFError </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: This error indicates that the RPC communication between the master gsyncd module and slave
-gsyncd module is broken and this can happen for various reasons. Check if it satisfies all the following
-pre-requisites:
-</para>
- <itemizedlist>
- <listitem>
- <para>Password-less SSH is set up properly between the host and the remote machine.
-</para>
- </listitem>
- <listitem>
- <para>If FUSE is installed in the machine, because geo-replication module mounts the GlusterFS volume
-using FUSE to sync data.
-</para>
- </listitem>
- <listitem>
- <para>If the <emphasis role="bold">Slave</emphasis> is a volume, check if that volume is started.
-</para>
- </listitem>
- <listitem>
- <para>If the Slave is a plain directory, verify if the directory has been created already with the
-required permissions.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.2 or higher is not installed in the default location (in Master) and has been prefixed to be
-installed in a custom location, configure the <command>gluster-command</command> for it to point to the exact
-location.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.2 or higher is not installed in the default location (in slave) and has been prefixed to be
-installed in a custom location, configure the <command>remote-gsyncd-command</command> for it to point to the
-exact place where gsyncd is located.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Intermediate Master goes to Faulty State </title>
- <para><emphasis role="bold">Description</emphasis>: In a cascading set-up, the intermediate master goes to faulty state with the following
-log:
-</para>
- <para><errortext>raise RuntimeError (&quot;aborting on uuid change from %s to %s&quot; % \ RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: In a cascading set-up the Intermediate master is loyal to the original primary master. The
-above log means that the geo-replication module has detected change in primary master.
-If this is the desired behavior, delete the config option volume-id in the session initiated from the
-intermediate master.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting POSIX ACLs </title>
- <para>This section describes the most common troubleshooting issues related to POSIX ACLs.
-</para>
- <section>
- <title>setfacl command fails with “setfacl: &lt;file or directory name&gt;: Operation not supported” error </title>
- <para>You may face this error when the backend file systems in one of the servers is not mounted with
-the &quot;-o acl&quot; option. The same can be confirmed by viewing the following error message in the log file
-of the server &quot;Posix access control list is not supported&quot;.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: Remount the backend file system with &quot;-o acl&quot; option. For more information, see <xref linkend="sect-Administration_Guide-ACLs-Activating_ACLs-Server"/>.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting Hadoop Compatible Storage </title>
- <para>This section describes the most common troubleshooting issues related to Hadoop Compatible
-Storage.
-
- </para>
- <section id="sect-Administration_Guide-Troubleshooting-Test_Section_1">
- <title>Time Sync</title>
- <para>Running MapReduce job may throw exceptions if the time is out-of-sync on the hosts in the cluster.
-
- </para>
- <para><emphasis role="bold">Solution</emphasis>: Sync the time on all hosts using ntpd program.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting NFS </title>
- <para>This section describes the most common troubleshooting issues related to NFS .
-</para>
- <section>
- <title>mount command on NFS client fails with “RPC Error: Program not registered” </title>
- <para>Start portmap or rpcbind service on the NFS server.
-</para>
- <para>This error is encountered when the server has not started correctly.
-</para>
- <para>On most Linux distributions this is fixed by starting portmap:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </section>
- <section>
- <title>NFS server start-up fails with “Port is already in use” error in the log file.&quot; </title>
- <para>Another Gluster NFS server is running on the same machine.
-</para>
- <para>This error can arise in case there is already a Gluster NFS server running on the same machine.
-This situation can be confirmed from the log file, if the following error lines exist:
-</para>
- <para><screen>[2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use
-[2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use
-[2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection
-[2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed
-[2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols</screen></para>
- <para>To resolve this error one of the Gluster NFS servers will have to be shutdown. At this time,
-Gluster NFS server does not support running multiple NFS servers on the same machine.
-</para>
- </section>
- <section>
- <title>mount command fails with “rpc.statd” related error message </title>
- <para>If the mount command fails with the following error message:
-</para>
- <para><errortext>mount.nfs: rpc.statd is not running but is required for remote locking. mount.nfs: Either use &apos;-o nolock&apos; to keep locks local, or start statd. </errortext></para>
- <para><errortext>Start rpc.statd </errortext></para>
- <para>For NFS clients to mount the NFS server, rpc.statd service must be running on the clients. </para>
- <para>Start
-rpc.statd service by running the following command:
-</para>
- <para><command>$ rpc.statd </command></para>
- </section>
- <section>
- <title>mount command takes too long to finish. </title>
- <para>Start rpcbind service on the NFS client.
-</para>
- <para>The problem is that the rpcbind or portmap service is not running on the NFS client. The
-resolution for this is to start either of these services by running the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start</command></para>
- </section>
- <section>
- <title>NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log. </title>
- <para>NFS start-up can succeed but the initialization of the NFS service can still fail preventing clients
-from accessing the mount points. Such a situation can be confirmed from the following error
-messages in the log file:
-</para>
- <para><screen>[2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap
-[2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed
-[2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols
-[2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap
-[2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
-[2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465</screen></para>
- <orderedlist>
- <listitem>
- <para>Start portmap or rpcbind service on the NFS server.
-</para>
- <para>On most Linux distributions, portmap can be started using the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start </command></para>
- <para>On some distributions where portmap has been replaced by rpcbind, run the following command:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </listitem>
- <listitem>
- <para>Stop another NFS server running on the same machine.
-</para>
- <para>Such an error is also seen when there is another NFS server running on the same machine but it is
-not the Gluster NFS server. On Linux systems, this could be the kernel NFS server. Resolution
-involves stopping the other NFS server or not running the Gluster NFS server on the machine.
-Before stopping the kernel NFS server, ensure that no critical service depends on access to that
-NFS server&apos;s exports.
-</para>
- <para>On Linux, kernel NFS servers can be stopped by using either of the following commands
-depending on the distribution in use:
-</para>
- <para><command>$ /etc/init.d/nfs-kernel-server stop</command>
-</para>
- <para><command>$ /etc/init.d/nfs stop</command></para>
- </listitem>
- <listitem>
- <para>Restart Gluster NFS server.
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>mount command fails with NFS server failed error. </title>
- <para>mount command fails with following error
-</para>
- <para><emphasis role="italic">mount: mount to NFS server &apos;10.1.10.11&apos; failed: timed out (retrying).</emphasis></para>
- <para>Perform one of the following to resolve this issue:
-</para>
- <orderedlist>
- <listitem>
- <para>Disable name lookup requests from NFS server to a DNS server.
-</para>
- <para>The NFS server attempts to authenticate NFS clients by performing a reverse DNS lookup to
-match hostnames in the volume file with the client IP addresses. There can be a situation where
-the NFS server either is not able to connect to the DNS server or the DNS server is taking too long
-to responsd to DNS request. These delays can result in delayed replies from the NFS server to the
-NFS client resulting in the timeout error seen above.
-</para>
- <para>NFS server provides a work-around that disables DNS requests, instead relying only on the client
-IP addresses for authentication. The following option can be added for successful mounting in
-such situations:
-</para>
- <para><command>option rpc-auth.addr.namelookup off </command></para>
- <para><note>
- <para>Note: Remember that disabling the NFS server forces authentication of clients to use only IP
-addresses and if the authentication rules in the volume file use hostnames, those authentication
-rules will fail and disallow mounting for those clients.
-</para>
- </note></para>
- <para>or</para>
- </listitem>
- <listitem>
- <para>NFS version used by the NFS client is other than version 3.
-</para>
- <para>Gluster NFS server supports version 3 of NFS protocol. In recent Linux kernels, the default NFS
-version has been changed from 3 to 4. It is possible that the client machine is unable to connect
-to the Gluster NFS server because it is using version 4 messages which are not understood by
-Gluster NFS server. The timeout can be resolved by forcing the NFS client to use version 3. The
-<emphasis role="bold">vers</emphasis> option to mount command is used for this purpose:
-</para>
- <para><command>$ mount <replaceable>nfsserver</replaceable><replaceable>:export</replaceable> -o vers=3 <replaceable>mount-point</replaceable></command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>showmount fails with clnt_create: RPC: Unable to receive </title>
- <para>Check your firewall setting to open ports 111 for portmap requests/replies and Gluster NFS
-server requests/replies. Gluster NFS server operates over the following port numbers: 38465,
-38466, and 38467.
-</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM"/>.
-</para>
- </section>
- <section>
- <title>Application fails with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; error. </title>
- <para>These two errors generally happen for 32-bit nfs clients or applications that do not support 64-bit
-inode numbers or large files.
-Use the following option from the CLI to make Gluster NFS return 32-bit inode numbers instead:
-nfs.enable-ino32 &lt;on|off&gt;
-</para>
- <para>Applications that will benefit are those that were either:
-</para>
- <itemizedlist>
- <listitem>
- <para>built 32-bit and run on 32-bit machines such that they do not support large files by default</para>
- </listitem>
- <listitem>
- <para>built 32-bit on 64-bit systems
-</para>
- </listitem>
- </itemizedlist>
- <para>This option is disabled by default so NFS returns 64-bit inode numbers by default.
-</para>
- <para>Applications which can be rebuilt from source are recommended to rebuild using the following
-flag with gcc:</para>
- <para><command> -D_FILE_OFFSET_BITS=64</command>
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting File Locks</title>
- <para>In GlusterFS 3.3 you can use <command>statedump</command> command to list the locks held on files. The statedump output also provides information on each lock with its range, basename, PID of the application holding the lock, and so on. You can analyze the output to know about the locks whose owner/application is no longer running or interested in that lock. After ensuring that the no application is using the file, you can clear the lock using the following <command>clear lock</command> command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind {blocked | granted | all}{inode [range] | entry [basename] | posix [range]}</command></command></para>
- <para>For more information on performing <command>statedump</command>, see <xref linkend="sect-Administration_Guide-Monitor_Workload-Performing_Statedump"/></para>
- <para><emphasis role="bold">To identify locked file and clear locks</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Perform statedump on the volume to view the files that are locked using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> inode</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- <para>The following are the sample contents of the statedump file. It indicates that GlusterFS has entered into a state where there is an entry lock (entrylk) and an inode lock (inodelk). Ensure that those are stale locks and no resources own them. </para>
- <para><screen>[xlator.features.locks.vol-locks.inode]
-path=/
-mandatory=0
-entrylk-count=1
-lock-dump.domain.domain=vol-replicate-0
-xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
-
-conn.2.bound_xl./gfs/brick1.hashsize=14057
-conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode
-conn.2.bound_xl./gfs/brick1.lru_limit=16384
-conn.2.bound_xl./gfs/brick1.active_size=2
-conn.2.bound_xl./gfs/brick1.lru_size=0
-conn.2.bound_xl./gfs/brick1.purge_size=0
-
-[conn.2.bound_xl./gfs/brick1.active.1]
-gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07
-nlookup=1
-ref=2
-ia_type=1
-[xlator.features.locks.vol-locks.inode]
-path=/file1
-mandatory=0
-inodelk-count=1
-lock-dump.domain.domain=vol-replicate-0
-inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012</screen></para>
- </listitem>
- <listitem>
- <para>Clear the lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted entry basename</command></command></para>
- <para>For example, to clear the entry lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume / kind granted entry file1
-Volume clear-locks successful
-vol-locks: entry blocked locks=0 granted locks=1</screen></para>
- </listitem>
- <listitem>
- <para>Clear the inode lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted inode range </command></command></para>
- <para>For example, to clear the inode lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0
-Volume clear-locks successful
-vol-locks: inode blocked locks=0 granted locks=1</screen></para>
- <para>You can perform statedump on test-volume again to verify that the above inode and entry locks are cleared.</para>
- </listitem>
- </orderedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/gfs_introduction.xml b/doc/admin-guide/en-US/gfs_introduction.xml
deleted file mode 100644
index 5fd887305..000000000
--- a/doc/admin-guide/en-US/gfs_introduction.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Introducing Gluster File System</title>
- <para>GlusterFS is an open source, clustered file system capable of scaling to several petabytes and handling thousands of clients. GlusterFS can be flexibly combined with commodity physical, virtual, and cloud resources to deliver highly available and performant enterprise storage at a fraction of the cost of traditional solutions.</para>
- <para>GlusterFS clusters together storage building blocks over Infiniband RDMA and/or TCP/IP interconnect, aggregating disk and memory resources and managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads.
-</para>
- <figure>
- <title>Virtualized Cloud Environments</title>
- <mediaobject>
- <textobject>
- <phrase>Virtualized Cloud Environments</phrase>
- </textobject>
- <imageobject>
- <imagedata align="center" fileref="images/640px-GlusterFS_3.2_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para>GlusterFS is designed for today&apos;s high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments require multi-tenancy along with the ability to grow or shrink resources on demand. Enterprises can scale capacity, performance, and availability on demand, with no vendor lock-in, across on-premise, public cloud, and hybrid environments. </para>
- <para>GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The following table lists the commercial offerings and its documentation location:
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1" colwidth="16%"/>
- <colspec colname="c2" colwidth="84%"/>
- <thead>
- <row>
- <entry>Product</entry>
- <entry>Documentation Location</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Red Hat Storage Software Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Virtual Storage Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Storage </entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html"/>
- </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
-</chapter>
diff --git a/doc/admin-guide/en-US/glossary.xml b/doc/admin-guide/en-US/glossary.xml
deleted file mode 100644
index 8c314feaa..000000000
--- a/doc/admin-guide/en-US/glossary.xml
+++ /dev/null
@@ -1,126 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Glossary</title>
- <glosslist>
- <glossentry>
- <glossterm>Brick</glossterm>
- <glossdef>
- <para>A Brick is the GlusterFS basic unit of storage, represented by an export directory on a server in the trusted storage pool. A Brick is expressed by combining a server with an export directory in the following format:</para>
- <para><code>SERVER:EXPORT</code></para>
- <para>For example:</para>
- <para><filename>myhostname:/exports/myexportdir/</filename></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Cluster</glossterm>
- <glossdef>
- <para>A cluster is a group of linked computers, working together closely thus in many respects forming a single computer.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Distributed File System</glossterm>
- <glossdef>
- <para>A file system that allows multiple clients to concurrently access data over a computer network.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Filesystem</glossterm>
- <glossdef>
- <para>A method of storing and organizing computer files and their data. Essentially, it organizes these files into a database for the storage, organization, manipulation, and retrieval by the computer&apos;s operating system.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>FUSE</glossterm>
- <glossdef>
- <para>Filesystem in Userspace (<acronym>FUSE</acronym>) is a loadable kernel module for Unix-like computer operating systems that lets non-privileged users create their own file systems without editing kernel code. This is achieved by running file system code in user space while the <acronym>FUSE</acronym> module provides only a &quot;bridge&quot; to the actual kernel interfaces.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem_in_Userspace">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Geo-Replication</glossterm>
- <glossdef>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from site to another over Local Area Networks (<acronym>LAN</acronym>), Wide Area Network (<acronym>WAN</acronym>), and across the Internet.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>glusterd</glossterm>
- <glossdef>
- <para>The Gluster management daemon that needs to run on all servers in the trusted storage pool.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Metadata</glossterm>
- <glossdef>
- <para>Metadata is data providing information about one or more other pieces of data.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Namespace</glossterm>
- <glossdef>
- <para>Namespace is an abstract container or environment created to hold a logical grouping of unique identifiers or symbols. Each Gluster volume exposes a single namespace as a POSIX mount point that contains every file in the cluster.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Open Source</glossterm>
- <glossdef>
- <para>Open source describes practices in production and development that promote access to the end product&apos;s source materials. Some consider open source a philosophy, others consider it a pragmatic methodology.</para>
- <para>Before the term open source became widely adopted, developers and producers used a variety of phrases to describe the concept; open source gained hold with the rise of the Internet, and the attendant need for massive retooling of the computing source code.</para>
- <para>Opening the source code enabled a self-enhancing diversity of production models, communication paths, and interactive communities. Subsequently, a new, three-word phrase &quot;open source software&quot; was born to describe the environment that the new copyright, licensing, domain, and consumer issues created.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Open_source">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Petabyte</glossterm>
- <glossdef>
- <para>A petabyte (derived from the SI prefix peta- ) is a unit of information equal to one quadrillion (short scale) bytes, or 1000 terabytes. The unit symbol for the petabyte is PB. The prefix peta- (P) indicates a power of 1000:</para>
- <para>1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.</para>
- <para>The term &quot;pebibyte&quot; (<acronym>PiB</acronym>), using a binary prefix, is used for the corresponding power of 1024.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Petabyte">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>POSIX</glossterm>
- <glossdef>
- <para>Portable Operating System Interface (for Unix) is the name of a family of related standards specified by the IEEE to define the application programming interface (<acronym>API</acronym>), along with shell and utilities interfaces for software compatible with variants of the Unix operating system. Gluster exports a fully <acronym>POSIX</acronym> compliant file system.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RAID</glossterm>
- <glossdef>
- <para>Redundant Array of Inexpensive Disks (<acronym>RAID</acronym>) is a technology that provides increased storage reliability through redundancy, combining multiple low-cost, less-reliable disk drives components into a logical unit where all drives in the array are interdependent.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RRDNS</glossterm>
- <glossdef>
- <para>Round Robin Domain Name Service (<acronym>RRDNS</acronym>) is a method to distribute load across application servers. <acronym>RRDNS</acronym> is implemented by creating multiple A records with the same name and different IP addresses in the zone file of a DNS server.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Trusted Storage Pool</glossterm>
- <glossdef>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Userspace</glossterm>
- <glossdef>
- <para>Applications running in user space don’t directly interact with hardware, instead using the kernel to moderate access. Userspace applications are generally more portable than applications in kernel space. Gluster is a user space application.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volfile</glossterm>
- <glossdef>
- <para>Volfile is a configuration file used by glusterfs process. Volfile will be usually located at <filename>/etc/glusterd/vols/VOLNAME</filename>.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volume</glossterm>
- <glossdef>
- <para>A volume is a logical collection of bricks. Most of the gluster management operations happen on the volume.</para>
- </glossdef>
- </glossentry>
- </glosslist>
-</chapter>
diff --git a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png
index 95f89ec82..95f89ec82 100644
--- a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png
+++ b/doc/admin-guide/en-US/images/640px-GlusterFS_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/GlusterFS_Architecture.png
index b506db1f4..b506db1f4 100644
--- a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png
+++ b/doc/admin-guide/en-US/images/GlusterFS_Architecture.png
Binary files differ
diff --git a/doc/admin-guide/en-US/images/arhitecture.png b/doc/admin-guide/en-US/images/arhitecture.png
deleted file mode 100644
index 4e5188bf8..000000000
--- a/doc/admin-guide/en-US/images/arhitecture.png
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>HTTP Error 403</title>
-</head>
-<body>
-<h1>Error 403</h1>
-<p>We're sorry, but we could not fulfill your request for
-/community/documentation/index.php/Image:GlusterFS_3.2_Architecture.png on this server.</p>
-<p>An invalid request was received from your browser. This may be caused by a malfunctioning proxy server or browser privacy software.</p>
-<p>Your technical support key is: <strong>7ab5-0b6a-1756-6707</strong></p>
-<p>You can use this key to <a href="http://www.ioerror.us/bb2-support-key?key=7ab5-0b6a-1756-6707">fix this problem yourself</a>.</p>
-<p>If you are unable to fix the problem yourself, please contact <a href="mailto:webmaster+nospam@nospam.gluster.com">webmaster at gluster.com</a> and be sure to provide the technical support key shown above.</p>
diff --git a/doc/admin-guide/en-US/markdown/Administration_Guide.md b/doc/admin-guide/en-US/markdown/Administration_Guide.md
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Administration_Guide.md
@@ -0,0 +1 @@
+
diff --git a/doc/admin-guide/en-US/markdown/Author_Group.md b/doc/admin-guide/en-US/markdown/Author_Group.md
new file mode 100644
index 000000000..ef2a5e677
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Author_Group.md
@@ -0,0 +1,5 @@
+Divya
+Muntimadugu
+Red Hat
+Engineering Content Services
+divya@redhat.com
diff --git a/doc/admin-guide/en-US/markdown/Book_Info.md b/doc/admin-guide/en-US/markdown/Book_Info.md
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Book_Info.md
@@ -0,0 +1 @@
+
diff --git a/doc/admin-guide/en-US/markdown/Chapter.md b/doc/admin-guide/en-US/markdown/Chapter.md
new file mode 100644
index 000000000..8420259c4
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Chapter.md
@@ -0,0 +1,18 @@
+Test Chapter
+============
+
+This is a test paragraph
+
+Test Section 1
+==============
+
+This is a test paragraph in a section
+
+Test Section 2
+==============
+
+This is a test paragraph in Section 2
+
+1. listitem text
+
+
diff --git a/doc/admin-guide/en-US/markdown/Preface.md b/doc/admin-guide/en-US/markdown/Preface.md
new file mode 100644
index 000000000..f7e934ae8
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Preface.md
@@ -0,0 +1,22 @@
+Preface
+=======
+
+This guide describes how to configure, operate, and manage Gluster File
+System (GlusterFS).
+
+Audience
+========
+
+This guide is intended for Systems Administrators interested in
+configuring and managing GlusterFS.
+
+This guide assumes that you are familiar with the Linux operating
+system, concepts of File System, GlusterFS concepts, and GlusterFS
+Installation
+
+License
+=======
+
+The License information is available at [][].
+
+ []: http://www.redhat.com/licenses/rhel_rha_eula.html
diff --git a/doc/admin-guide/en-US/markdown/Revision_History.md b/doc/admin-guide/en-US/markdown/Revision_History.md
new file mode 100644
index 000000000..2084309d1
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/Revision_History.md
@@ -0,0 +1,4 @@
+Revision History
+================
+
+1-0 Thu Apr 5 2012 Divya Muntimadugu <divya@redhat.com> Draft
diff --git a/doc/admin-guide/en-US/markdown/admin_ACLs.md b/doc/admin-guide/en-US/markdown/admin_ACLs.md
new file mode 100644
index 000000000..308e069ca
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_ACLs.md
@@ -0,0 +1,197 @@
+POSIX Access Control Lists
+==========================
+
+POSIX Access Control Lists (ACLs) allows you to assign different
+permissions for different users or groups even though they do not
+correspond to the original owner or the owning group.
+
+For example: User john creates a file but does not want to allow anyone
+to do anything with this file, except another user, antony (even though
+there are other users that belong to the group john).
+
+This means, in addition to the file owner, the file group, and others,
+additional users and groups can be granted or denied access by using
+POSIX ACLs.
+
+Activating POSIX ACLs Support
+=============================
+
+To use POSIX ACLs for a file or directory, the partition of the file or
+directory must be mounted with POSIX ACLs support.
+
+Activating POSIX ACLs Support on Sever
+--------------------------------------
+
+To mount the backend export directories for POSIX ACLs support, use the
+following command:
+
+`# mount -o acl `
+
+For example:
+
+`# mount -o acl /dev/sda1 /export1 `
+
+Alternatively, if the partition is listed in the /etc/fstab file, add
+the following entry for the partition to include the POSIX ACLs option:
+
+`LABEL=/work /export1 ext3 rw, acl 14 `
+
+Activating POSIX ACLs Support on Client
+---------------------------------------
+
+To mount the glusterfs volumes for POSIX ACLs support, use the following
+command:
+
+`# mount –t glusterfs -o acl `
+
+For example:
+
+`# mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster`
+
+Setting POSIX ACLs
+==================
+
+You can set two types of POSIX ACLs, that is, access ACLs and default
+ACLs. You can use access ACLs to grant permission for a specific file or
+directory. You can use default ACLs only on a directory but if a file
+inside that directory does not have an ACLs, it inherits the permissions
+of the default ACLs of the directory.
+
+You can set ACLs for per user, per group, for users not in the user
+group for the file, and via the effective right mask.
+
+Setting Access ACLs
+-------------------
+
+You can apply access ACLs to grant permission for both files and
+directories.
+
+**To set or modify Access ACLs**
+
+You can set or modify access ACLs use the following command:
+
+`# setfacl –m file `
+
+The ACL entry types are the POSIX ACLs representations of owner, group,
+and other.
+
+Permissions must be a combination of the characters `r` (read), `w`
+(write), and `x` (execute). You must specify the ACL entry in the
+following format and can specify multiple entry types separated by
+commas.
+
+ ACL Entry Description
+ ---------------------- --------------------------------------------------------------------------------------------------------------------------------------------------
+ u:uid:\<permission\> Sets the access ACLs for a user. You can specify user name or UID
+ g:gid:\<permission\> Sets the access ACLs for a group. You can specify group name or GID.
+ m:\<permission\> Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries.
+ o:\<permission\> Sets the access ACLs for users other than the ones in the group for the file.
+
+If a file or directory already has an POSIX ACLs, and the setfacl
+command is used, the additional permissions are added to the existing
+POSIX ACLs or the existing rule is modified.
+
+For example, to give read and write permissions to user antony:
+
+`# setfacl -m u:antony:rw /mnt/gluster/data/testfile `
+
+Setting Default ACLs
+--------------------
+
+You can apply default ACLs only to directories. They determine the
+permissions of a file system objects that inherits from its parent
+directory when it is created.
+
+To set default ACLs
+
+You can set default ACLs for files and directories using the following
+command:
+
+`# setfacl –m –-set `
+
+For example, to set the default ACLs for the /data directory to read for
+users not in the user group:
+
+`# setfacl –m --set o::r /mnt/gluster/data `
+
+> **Note**
+>
+> An access ACLs set for an individual file can override the default
+> ACLs permissions.
+
+**Effects of a Default ACLs**
+
+The following are the ways in which the permissions of a directory's
+default ACLs are passed to the files and subdirectories in it:
+
+- A subdirectory inherits the default ACLs of the parent directory
+ both as its default ACLs and as an access ACLs.
+
+- A file inherits the default ACLs as its access ACLs.
+
+Retrieving POSIX ACLs
+=====================
+
+You can view the existing POSIX ACLs for a file or directory.
+
+**To view existing POSIX ACLs**
+
+- View the existing access ACLs of a file using the following command:
+
+ `# getfacl `
+
+ For example, to view the existing POSIX ACLs for sample.jpg
+
+ # getfacl /mnt/gluster/data/test/sample.jpg
+ # owner: antony
+ # group: antony
+ user::rw-
+ group::rw-
+ other::r--
+
+- View the default ACLs of a directory using the following command:
+
+ `# getfacl `
+
+ For example, to view the existing ACLs for /data/doc
+
+ # getfacl /mnt/gluster/data/doc
+ # owner: antony
+ # group: antony
+ user::rw-
+ user:john:r--
+ group::r--
+ mask::r--
+ other::r--
+ default:user::rwx
+ default:user:antony:rwx
+ default:group::r-x
+ default:mask::rwx
+ default:other::r-x
+
+Removing POSIX ACLs
+===================
+
+To remove all the permissions for a user, groups, or others, use the
+following command:
+
+`# setfacl -x `
+
+For example, to remove all permissions from the user antony:
+
+`# setfacl -x u:antony /mnt/gluster/data/test-file`
+
+Samba and ACLs
+==============
+
+If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs
+are enabled by default. Samba has been compiled with the
+`--with-acl-support` option, so no special flags are required when
+accessing or mounting a Samba share.
+
+NFS and ACLs
+============
+
+Currently we do not support ACLs configuration through NFS, i.e. setfacl
+and getfacl commands do not work. However, ACLs permissions set using
+Gluster Native Client is applicable on NFS mounts.
diff --git a/doc/admin-guide/en-US/markdown/admin_Hadoop.md b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
new file mode 100644
index 000000000..2894fa713
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_Hadoop.md
@@ -0,0 +1,170 @@
+Managing Hadoop Compatible Storage
+==================================
+
+GlusterFS provides compatibility for Apache Hadoop and it uses the
+standard file system APIs available in Hadoop to provide a new storage
+option for Hadoop deployments. Existing MapReduce based applications can
+use GlusterFS seamlessly. This new functionality opens up data within
+Hadoop deployments to any file-based or object-based application.
+
+Architecture Overview
+=====================
+
+The following diagram illustrates Hadoop integration with GlusterFS:
+
+Advantages
+==========
+
+The following are the advantages of Hadoop Compatible Storage with
+GlusterFS:
+
+- Provides simultaneous file-based and object-based access within
+ Hadoop.
+
+- Eliminates the centralized metadata server.
+
+- Provides compatibility with MapReduce applications and rewrite is
+ not required.
+
+- Provides a fault tolerant file system.
+
+Preparing to Install Hadoop Compatible Storage
+==============================================
+
+This section provides information on pre-requisites and list of
+dependencies that will be installed during installation of Hadoop
+compatible storage.
+
+Pre-requisites
+--------------
+
+The following are the pre-requisites to install Hadoop Compatible
+Storage :
+
+- Hadoop 0.20.2 is installed, configured, and is running on all the
+ machines in the cluster.
+
+- Java Runtime Environment
+
+- Maven (mandatory only if you are building the plugin from the
+ source)
+
+- JDK (mandatory only if you are building the plugin from the source)
+
+- getfattr - command line utility
+
+Installing, and Configuring Hadoop Compatible Storage
+=====================================================
+
+This section describes how to install and configure Hadoop Compatible
+Storage in your storage environment and verify that it is functioning
+correctly.
+
+1. Download `glusterfs-hadoop-0.20.2-0.1.x86_64.rpm` file to each
+ server on your cluster. You can download the file from [][].
+
+2. To install Hadoop Compatible Storage on all servers in your cluster,
+ run the following command:
+
+ `# rpm –ivh --nodeps glusterfs-hadoop-0.20.2-0.1.x86_64.rpm`
+
+ The following files will be extracted:
+
+ - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar
+
+ - /usr/local/lib/conf/core-site.xml
+
+3. (Optional) To install Hadoop Compatible Storage in a different
+ location, run the following command:
+
+ `# rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop glusterfs-hadoop- 0.20.2-0.1.x86_64.rpm`
+
+4. Edit the `conf/core-site.xml` file. The following is the sample
+ `conf/core-site.xml` file:
+
+ <configuration>
+ <property>
+ <name>fs.glusterfs.impl</name>
+ <value>org.apache.hadoop.fs.glusterfs.Gluster FileSystem</value>
+ </property>
+
+ <property>
+ <name>fs.default.name</name>
+ <value>glusterfs://fedora1:9000</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.volname</name>
+ <value>hadoopvol</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.mount</name>
+ <value>/mnt/glusterfs</value>
+ </property>
+
+ <property>
+ <name>fs.glusterfs.server</name>
+ <value>fedora2</value>
+ </property>
+
+ <property>
+ <name>quick.slave.io</name>
+ <value>Off</value>
+ </property>
+ </configuration>
+
+ The following are the configurable fields:
+
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Property Name Default Value Description
+ ---------------------- -------------------------- ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ fs.default.name glusterfs://fedora1:9000 Any hostname in the cluster as the server and any port number.
+
+ fs.glusterfs.volname hadoopvol GlusterFS volume to mount.
+
+ fs.glusterfs.mount /mnt/glusterfs The directory used to fuse mount the volume.
+
+ fs.glusterfs.server fedora2 Any hostname or IP address on the cluster except the client/master.
+
+ quick.slave.io Off Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster.
+ > **Note**
+ >
+ > This option is not tested widely
+ -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+5. Create a soft link in Hadoop’s library and configuration directory
+ for the downloaded files (in Step 3) using the following commands:
+
+ `# ln -s >`
+
+ For example,
+
+ `# ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar /lib/glusterfs-0.20.2-0.1.jar`
+
+ `# ln –s /usr/local/lib/conf/core-site.xml /conf/core-site.xml `
+
+6. (Optional) You can run the following command on Hadoop master to
+ build the plugin and deploy it along with core-site.xml file,
+ instead of repeating the above steps:
+
+ `# build-deploy-jar.py -d -c `
+
+Starting and Stopping the Hadoop MapReduce Daemon
+=================================================
+
+To start and stop MapReduce daemon
+
+- To start MapReduce daemon manually, enter the following command:
+
+ `# /bin/start-mapred.sh`
+
+- To stop MapReduce daemon manually, enter the following command:
+
+ `# /bin/stop-mapred.sh `
+
+> **Note**
+>
+> You must start Hadoop MapReduce daemon on all servers.
+
+ []: http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm
diff --git a/doc/admin-guide/en-US/markdown/admin_UFO.md b/doc/admin-guide/en-US/markdown/admin_UFO.md
new file mode 100644
index 000000000..3311eff01
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_UFO.md
@@ -0,0 +1,1219 @@
+Managing Unified File and Object Storage
+========================================
+
+Unified File and Object Storage (UFO) unifies NAS and object storage
+technology. It provides a system for data storage that enables users to
+access the same data, both as an object and as a file, thus simplifying
+management and controlling storage costs.
+
+Unified File and Object Storage is built upon Openstack's Object Storage
+Swift. Open Stack Object Storage allows users to store and retrieve
+files and content through a simple Web Service (REST: Representational
+State Transfer) interface as objects and GlusterFS, allows users to
+store and retrieve files using Native Fuse and NFS mounts. It uses
+GlusterFS as a backend file system for Open Stack Swift. It also
+leverages on Open Stack Swift's web interface for storing and retrieving
+files over the web combined with GlusterFS features like scalability and
+high availability, replication, elastic volume management for data
+management at disk level.
+
+Unified File and Object Storage technology enables enterprises to adopt
+and deploy cloud storage solutions. It allows users to access and modify
+data as objects from a REST interface along with the ability to access
+and modify files from NAS interfaces including NFS and CIFS. In addition
+to decreasing cost and making it faster and easier to access object
+data, it also delivers massive scalability, high availability and
+replication of object storage. Infrastructure as a Service (IaaS)
+providers can utilize GlusterFS Unified File and Object Storage
+technology to enable their own cloud storage service. Enterprises can
+use this technology to accelerate the process of preparing file-based
+applications for the cloud and simplify new application development for
+cloud computing environments.
+
+OpenStack Object Storage is scalable object storage system and it is not
+a traditional file system. You will not be able to mount this system
+like traditional SAN or NAS volumes and perform POSIX compliant
+operations.
+
+Components of Object Storage
+============================
+
+The major components of Object Storage are:
+
+**Proxy Server**
+
+All REST requests to the UFO are routed through the Proxy Server.
+
+**Objects and Containers**
+
+An object is the basic storage entity and any optional metadata that
+represents the data you store. When you upload data, the data is stored
+as-is (with no compression or encryption).
+
+A container is a storage compartment for your data and provides a way
+for you to organize your data. Containers can be visualized as
+directories in a Linux system. Data must be stored in a container and
+hence objects are created within a container.
+
+It implements objects as files and directories under the container. The
+object name is a '/' separated path and UFO maps it to directories until
+the last name in the path, which is marked as a file. With this
+approach, objects can be accessed as files and directories from native
+GlusterFS (FUSE) or NFS mounts by providing the '/' separated path.
+
+**Accounts and Account Servers**
+
+The OpenStack Object Storage system is designed to be used by many
+different storage consumers. Each user is associated with one or more
+accounts and must identify themselves using an authentication system.
+While authenticating, users must provide the name of the account for
+which the authentication is requested.
+
+UFO implements accounts as GlusterFS volumes. So, when a user is granted
+read/write permission on an account, it means that that user has access
+to all the data available on that GlusterFS volume.
+
+**Authentication and Access Permissions**
+
+You must authenticate against an authentication service to receive
+OpenStack Object Storage connection parameters and an authentication
+token. The token must be passed in for all subsequent container or
+object operations. One authentication service that you can use as a
+middleware example is called `tempauth`.
+
+By default, each user has their own storage account and has full access
+to that account. Users must authenticate with their credentials as
+described above, but once authenticated they can manage containers and
+objects within that account. If a user wants to access the content from
+another account, they must have API access key or a session token
+provided by their authentication system.
+
+Advantages of using GlusterFS Unified File and Object Storage
+=============================================================
+
+The following are the advantages of using GlusterFS UFO:
+
+- No limit on upload and download files sizes as compared to Open
+ Stack Swift which limits the object size to 5GB.
+
+- A unified view of data across NAS and Object Storage technologies.
+
+- Using GlusterFS's UFO has other advantages like the following:
+
+ - High availability
+
+ - Scalability
+
+ - Replication
+
+ - Elastic Volume management
+
+Preparing to Deploy Unified File and Object Storage
+===================================================
+
+This section provides information on pre-requisites and list of
+dependencies that will be installed during the installation of Unified
+File and Object Storage.
+
+Pre-requisites
+--------------
+
+GlusterFS's Unified File and Object Storage needs `user_xattr` support
+from the underlying disk file system. Use the following command to
+enable `user_xattr` for GlusterFS bricks backend:
+
+`# mount –o remount,user_xattr `
+
+For example,
+
+`# mount –o remount,user_xattr /dev/hda1 `
+
+Dependencies
+------------
+
+The following packages are installed on GlusterFS when you install
+Unified File and Object Storage:
+
+- curl
+
+- memcached
+
+- openssl
+
+- xfsprogs
+
+- python2.6
+
+- pyxattr
+
+- python-configobj
+
+- python-setuptools
+
+- python-simplejson
+
+- python-webob
+
+- python-eventlet
+
+- python-greenlet
+
+- python-pastedeploy
+
+- python-netifaces
+
+Installing and Configuring Unified File and Object Storage
+==========================================================
+
+This section provides instructions on how to install and configure
+Unified File and Object Storage in your storage environment.
+
+Installing Unified File and Object Storage
+------------------------------------------
+
+To install Unified File and Object Storage:
+
+1. Download `rhel_install.sh` install script from [][] .
+
+2. Run `rhel_install.sh` script using the following command:
+
+ `# sh rhel_install.sh`
+
+3. Download `swift-1.4.5-1.noarch.rpm` and
+ `swift-plugin-1.0.-1.el6.noarch.rpm` files from [][].
+
+4. Install `swift-1.4.5-1.noarch.rpm` and
+ `swift-plugin-1.0.-1.el6.noarch.rpm` using the following commands:
+
+ `# rpm -ivh swift-1.4.5-1.noarch.rpm`
+
+ `# rpm -ivh swift-plugin-1.0.-1.el6.noarch.rpm`
+
+ > **Note**
+ >
+ > You must repeat the above steps on all the machines on which you
+ > want to install Unified File and Object Storage. If you install
+ > the Unified File and Object Storage on multiple servers, you can
+ > use a load balancer like pound, nginx, and so on to distribute the
+ > request across the machines.
+
+Adding Users
+------------
+
+The authentication system allows the administrator to grant different
+levels of access to different users based on the requirement. The
+following are the types of user permissions:
+
+- admin user
+
+- normal user
+
+Admin user has read and write permissions on the account. By default, a
+normal user has no read or write permissions. A normal user can only
+authenticate itself to get a Auth-Token. Read or write permission are
+provided through ACLs by the admin users.
+
+Add a new user by adding the following entry in
+`/etc/swift/proxy-server.conf` file:
+
+`user_<account-name>_<user-name> = <password> [.admin]`
+
+For example,
+
+`user_test_tester = testing .admin`
+
+> **Note**
+>
+> During installation, the installation script adds few sample users to
+> the `proxy-server.conf` file. It is highly recommended that you remove
+> all the default sample user entries from the configuration file.
+
+For more information on setting ACLs, see ?.
+
+Configuring Proxy Server
+------------------------
+
+The Proxy Server is responsible for connecting to the rest of the
+OpenStack Object Storage architecture. For each request, it looks up the
+location of the account, container, or object in the ring and route the
+request accordingly. The public API is also exposed through the proxy
+server. When objects are streamed to or from an object server, they are
+streamed directly through the proxy server to or from the user – the
+proxy server does not spool them.
+
+The configurable options pertaining to proxy server are stored in
+`/etc/swift/proxy-server.conf`. The following is the sample
+`proxy-server.conf` file:
+
+ [app:proxy-server]
+ use = egg:swift#proxy
+ allow_account_management=true
+ account_autocreate=true
+
+ [filter:tempauth]
+ use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
+ user_test_tester=testing.admin
+ user_test2_tester2=testing2.admin
+ user_test_tester3=testing3
+
+ [filter:healthcheck]
+ use = egg:swift#healthcheck
+
+ [filter:cache]
+ use = egg:swift#memcache
+
+By default, GlusterFS's Unified File and Object Storage is configured to
+support HTTP protocol and uses temporary authentication to authenticate
+the HTTP requests.
+
+Configuring Authentication System
+---------------------------------
+
+Proxy server must be configured to authenticate using `
+
+ `.
+
+Configuring Proxy Server for HTTPS
+----------------------------------
+
+By default, proxy server only handles HTTP request. To configure the
+proxy server to process HTTPS requests, perform the following steps:
+
+1. Create self-signed cert for SSL using the following commands:
+
+ cd /etc/swift
+ openssl req -new -x509 -nodes -out cert.crt -keyout cert.key
+
+2. Add the following lines to `/etc/swift/proxy-server.conf `under
+ [DEFAULT]
+
+ bind_port = 443
+ cert_file = /etc/swift/cert.crt
+ key_file = /etc/swift/cert.key
+
+3. Restart the servers using the following commands:
+
+ swift-init main stop
+ swift-init main start
+
+The following are the configurable options:
+
+ Option Default Description
+ ------------ ------------ -------------------------------
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 80 Port for server to bind
+ swift\_dir /etc/swift Swift configuration directory
+ workers 1 Number of workers to fork
+ user swift swift user
+ cert\_file Path to the ssl .crt
+ key\_file Path to the ssl .key
+
+ : proxy-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ ------------------------------- ----------------- -----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name proxy-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Log level
+ log\_headers True If True, log headers in each request
+ recheck\_account\_existence 60 Cache timeout in seconds to send memcached for account existence
+ recheck\_container\_existence 60 Cache timeout in seconds to send memcached for container existence
+ object\_chunk\_size 65536 Chunk size to read from object servers
+ client\_chunk\_size 65536 Chunk size to read from clients
+ memcache\_servers 127.0.0.1:11211 Comma separated list of memcached servers ip:port
+ node\_timeout 10 Request timeout to external services
+ client\_timeout 60 Timeout to read one chunk from a client
+ conn\_timeout 0.5 Connection timeout to external services
+ error\_suppression\_interval 60 Time in seconds that must elapse since the last error for a node to be considered no longer error limited
+ error\_suppression\_limit 10 Error count to consider a node error limited
+ allow\_account\_management false Whether account `PUT`s and `DELETE`s are even callable
+
+ : proxy-server.conf Server Options in the [proxy-server] section
+
+Configuring Object Server
+-------------------------
+
+The Object Server is a very simple blob storage server that can store,
+retrieve, and delete objects stored on local devices. Objects are stored
+as binary files on the file system with metadata stored in the file’s
+extended attributes (xattrs). This requires that the underlying file
+system choice for object servers support xattrs on files.
+
+The configurable options pertaining Object Server are stored in the file
+`/etc/swift/object-server/1.conf`. The following is the sample
+`object-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6010
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster object-server
+
+ [app:object-server]
+ use = egg:swift#object
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [object-replicator]
+ vm_test_mode = yes
+
+ [object-updater]
+ [object-auditor]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node Mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6000 Port for server to bind
+ workers 1 Number of workers to fork
+
+ : object-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ ---------------------- --------------- ----------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`.
+ log\_name object-server log name used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+ log\_requests True Whether or not to log each request
+ user swift swift user
+ node\_timeout 3 Request timeout to external services
+ conn\_timeout 0.5 Connection timeout to external services
+ network\_chunk\_size 65536 Size of chunks to read or write over the network
+ disk\_chunk\_size 65536 Size of chunks to read or write to disk
+ max\_upload\_time 65536 Maximum time allowed to upload an object
+ slow 0 If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete
+
+ : object-server.conf Server Options in the [object-server] section
+
+Configuring Container Server
+----------------------------
+
+The Container Server’s primary job is to handle listings of objects. The
+listing is done by querying the GlusterFS mount point with path. This
+query returns a list of all files and directories present under that
+container.
+
+The configurable options pertaining to container server are stored in
+`/etc/swift/container-server/1.conf` file. The following is the sample
+`container-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6011
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster container-server
+
+ [app:container-server]
+ use = egg:swift#container
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [container-replicator]
+ [container-updater]
+ [container-auditor]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node Mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6001 Port for server to bind
+ workers 1 Number of workers to fork
+ user swift Swift user
+
+ : container-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ --------------- ------------------ ----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name container-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+ node\_timeout 3 Request timeout to external services
+ conn\_timeout 0.5 Connection timeout to external services
+
+ : container-server.conf Server Options in the [container-server]
+ section
+
+Configuring Account Server
+--------------------------
+
+The Account Server is very similar to the Container Server, except that
+it is responsible for listing of containers rather than objects. In UFO,
+each gluster volume is an account.
+
+The configurable options pertaining to account server are stored in
+`/etc/swift/account-server/1.conf` file. The following is the sample
+`account-server/1.conf` file:
+
+ [DEFAULT]
+ devices = /srv/1/node
+ mount_check = false
+ bind_port = 6012
+ user = root
+ log_facility = LOG_LOCAL2
+
+ [pipeline:main]
+ pipeline = gluster account-server
+
+ [app:account-server]
+ use = egg:swift#account
+
+ [filter:gluster]
+ use = egg:swift#gluster
+
+ [account-replicator]
+ vm_test_mode = yes
+
+ [account-auditor]
+ [account-reaper]
+
+The following are the configurable options:
+
+ Option Default Description
+ -------------- ------------ ----------------------------------------------------------------------------------------------------
+ swift\_dir /etc/swift Swift configuration directory
+ devices /srv/node mount parent directory where devices are mounted
+ mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device
+ bind\_ip 0.0.0.0 IP Address for server to bind
+ bind\_port 6002 Port for server to bind
+ workers 1 Number of workers to fork
+ user swift Swift user
+
+ : account-server.conf Default Options in the [DEFAULT] section
+
+ Option Default Description
+ --------------- ---------------- ----------------------------------------------------------------------------------------------------------
+ use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`.
+ log\_name account-server Label used when logging
+ log\_facility LOG\_LOCAL0 Syslog log facility
+ log\_level INFO Logging level
+
+ : account-server.conf Server Options in the [account-server] section
+
+Starting and Stopping Server
+----------------------------
+
+You must start the server manually when system reboots and whenever you
+update/modify the configuration files.
+
+- To start the server, enter the following command:
+
+ `# swift_init main start`
+
+- To stop the server, enter the following command:
+
+ `# swift_init main stop`
+
+Working with Unified File and Object Storage
+============================================
+
+This section describes the REST API for administering and managing
+Object Storage. All requests will be directed to the host and URL
+described in the `X-Storage-URL HTTP` header obtained during successful
+authentication.
+
+Configuring Authenticated Access
+--------------------------------
+
+Authentication is the process of proving identity to the system. To use
+the REST interface, you must obtain an authorization token using GET
+method and supply it with v1.0 as the path.
+
+Each REST request against the Object Storage system requires the
+addition of a specific authorization token HTTP x-header, defined as
+X-Auth-Token. The storage URL and authentication token are returned in
+the headers of the response.
+
+- To authenticate, run the following command:
+
+ GET auth/v1.0 HTTP/1.1
+ Host: <auth URL>
+ X-Auth-User: <account name>:<user name>
+ X-Auth-Key: <user-Password>
+
+ For example,
+
+ GET auth/v1.0 HTTP/1.1
+ Host: auth.example.com
+ X-Auth-User: test:tester
+ X-Auth-Key: testing
+
+ HTTP/1.1 200 OK
+ X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test
+ X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
+ X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
+ Content-Length: 0
+ Date: Wed, 10 jul 2011 06:11:51 GMT
+
+ To authenticate access using cURL (for the above example), run the
+ following command:
+
+ curl -v -H 'X-Storage-User: test:tester' -H 'X-Storage-Pass:testing' -k
+ https://auth.example.com:443/auth/v1.0
+
+ The X-Auth-Url has to be parsed and used in the connection and
+ request line of all subsequent requests to the server. In the
+ example output, users connecting to server will send most
+ container/object requests with a host header of example.storage.com
+ and the request line's version and account as v1/AUTH\_test.
+
+> **Note**
+>
+> The authentication tokens are valid for a 24 hour period.
+
+Working with Accounts
+---------------------
+
+This section describes the list of operations you can perform at the
+account level of the URL.
+
+### Displaying Container Information
+
+You can list the objects of a specific container, or all containers, as
+needed using GET command. You can use the following optional parameters
+with GET request to refine the results:
+
+ Parameter Description
+ ----------- --------------------------------------------------------------------------
+ limit Limits the number of results to at most *n* value.
+ marker Returns object names greater in value than the specified marker.
+ format Specify either json or xml to return the respective serialized response.
+
+**To display container information**
+
+- List all the containers of an account using the following command:
+
+ GET /<apiversion>/<account> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ GET /v1/AUTH_test HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 16:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+ Content-Length: 39
+
+ songs
+ movies
+ documents
+ reports
+
+To display container information using cURL (for the above example), run
+the following command:
+
+ curl -v -X GET -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test -k
+
+### Displaying Account Metadata Information
+
+You can issue HEAD command to the storage service to view the number of
+containers and the total bytes stored in the account.
+
+- To display containers and storage used, run the following command:
+
+ HEAD /<apiversion>/<account> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ HEAD /v1/AUTH_test HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 16:52:21 GMT
+ Server: Apache
+ X-Account-Container-Count: 4
+ X-Account-Total-Bytes-Used: 394792
+
+ To display account metadata information using cURL (for the above
+ example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test -k
+
+Working with Containers
+-----------------------
+
+This section describes the list of operations you can perform at the
+container level of the URL.
+
+### Creating Containers
+
+You can use PUT command to create containers. Containers are the storage
+folders for your data. The URL encoded name must be less than 256 bytes
+and cannot contain a forward slash '/' character.
+
+- To create a container, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/ HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/ HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ HTTP/1.1 201 Created
+
+ Date: Wed, 13 Jul 2011 17:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To create container using cURL (for the above example), run the
+ following command:
+
+ curl -v -X PUT -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures -k
+
+ The status code of 201 (Created) indicates that you have
+ successfully created the container. If a container with same is
+ already existed, the status code of 202 is displayed.
+
+### Displaying Objects of a Container
+
+You can list the objects of a container using GET command. You can use
+the following optional parameters with GET request to refine the
+results:
+
+ Parameter Description
+ ----------- --------------------------------------------------------------------------------------------------------------
+ limit Limits the number of results to at most *n* value.
+ marker Returns object names greater in value than the specified marker.
+ prefix Displays the results limited to object names beginning with the substring x. beginning with the substring x.
+ path Returns the object names nested in the pseudo path.
+ format Specify either json or xml to return the respective serialized response.
+ delimiter Returns all the object names nested in the container.
+
+To display objects of a container
+
+- List objects of a specific container using the following command:
+
+<!-- -->
+
+ GET /<apiversion>/<account>/<container>[parm=value] HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+For example,
+
+ GET /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 15:42:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+ Content-Length: 139
+
+ sample file.jpg
+ test-file.pdf
+ You and Me.pdf
+ Puddle of Mudd.mp3
+ Test Reports.doc
+
+To display objects of a container using cURL (for the above example),
+run the following command:
+
+ curl -v -X GET-H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -k
+
+### Displaying Container Metadata Information
+
+You can issue HEAD command to the storage service to view the number of
+objects in a container and the total bytes of all the objects stored in
+the container.
+
+- To display list of objects and storage used, run the following
+ command:
+
+ HEAD /<apiversion>/<account>/<container> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ HEAD /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 19:52:21 GMT
+ Server: Apache
+ X-Account-Object-Count: 8
+ X-Container-Bytes-Used: 472
+
+ To display list of objects and storage used in a container using
+ cURL (for the above example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -k
+
+### Deleting Container
+
+You can use DELETE command to permanently delete containers. The
+container must be empty before it can be deleted.
+
+You can issue HEAD command to determine if it contains any objects.
+
+- To delete a container, run the following command:
+
+ DELETE /<apiversion>/<account>/<container>/ HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+
+ For example,
+
+ DELETE /v1/AUTH_test/pictures HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 17:52:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To delete a container using cURL (for the above example), run the
+ following command:
+
+ curl -v -X DELETE -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures -k
+
+ The status code of 204 (No Content) indicates that you have
+ successfully deleted the container. If that container does not
+ exist, the status code 404 (Not Found) is displayed, and if the
+ container is not empty, the status code 409 (Conflict) is displayed.
+
+### Updating Container Metadata
+
+You can update the metadata of container using POST operation, metadata
+keys should be prefixed with 'x-container-meta'.
+
+- To update the metadata of the object, run the following command:
+
+ POST /<apiversion>/<account>/<container> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+ X-Container-Meta-<key>: <new value>
+ X-Container-Meta-<key>: <new value>
+
+ For example,
+
+ POST /v1/AUTH_test/images HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Container-Meta-Zoo: Lion
+ X-Container-Meta-Home: Dog
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 20:52:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To update the metadata of the object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images -H ' X-Container-Meta-Zoo: Lion' -H 'X-Container-Meta-Home: Dog' -k
+
+ The status code of 204 (No Content) indicates the container's
+ metadata is updated successfully. If that object does not exist, the
+ status code 404 (Not Found) is displayed.
+
+### Setting ACLs on Container
+
+You can set the container access control list by using POST command on
+container with `x- container-read` and` x-container-write` keys.
+
+The ACL format is `[item[,item...]]`. Each item can be a group name to
+give access to or a referrer designation to grant or deny based on the
+HTTP Referer header.
+
+The referrer designation format is:` .r:[-]value`.
+
+The .r can also be `.ref, .referer, `or .`referrer`; though it will be
+shortened to.r for decreased character count usage. The value can be `*`
+to specify any referrer host is allowed access. The leading minus sign
+(-) indicates referrer hosts that should be denied access.
+
+Examples of valid ACLs:
+
+ .r:*
+ .r:*,bobs_account,sues_account:sue
+ bobs_account,sues_account:sue
+
+Examples of invalid ACLs:
+
+ .r:
+ .r:-
+
+By default, allowing read access via `r `will not allow listing objects
+in the container but allows retrieving objects from the container. To
+turn on listings, use the .`rlistings` directive. Also, `.r`
+designations are not allowed in headers whose names include the word
+write.
+
+For example, to set all the objects access rights to "public" inside the
+container using cURL (for the above example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images
+ -H 'X-Container-Read: .r:*' -k
+
+Working with Objects
+--------------------
+
+An object represents the data and any metadata for the files stored in
+the system. Through the REST interface, metadata for an object can be
+included by adding custom HTTP headers to the request and the data
+payload as the request body. Objects name should not exceed 1024 bytes
+after URL encoding.
+
+This section describes the list of operations you can perform at the
+object level of the URL.
+
+### Creating or Updating Object
+
+You can use PUT command to write or update an object's content and
+metadata.
+
+You can verify the data integrity by including an MD5checksum for the
+object's data in the ETag header. ETag header is optional and can be
+used to ensure that the object's contents are stored successfully in the
+storage system.
+
+You can assign custom metadata to objects by including additional HTTP
+headers on the PUT request. The objects created with custom metadata via
+HTTP headers are identified with the`X-Object- Meta`- prefix.
+
+- To create or update an object, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+ ETag: da1e100dc9e7becc810986e37875ae38
+ Content-Length: 342909
+ X-Object-Meta-PIN: 2343
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/dog HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ ETag: da1e100dc9e7becc810986e37875ae38
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ ETag: da1e100dc9e7becc810986e37875ae38
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To create or update an object using cURL (for the above example),
+ run the following command:
+
+ curl -v -X PUT -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures/dog -H 'Content-
+ Length: 0' -k
+
+ The status code of 201 (Created) indicates that you have
+ successfully created or updated the object. If there is a missing
+ content-Length or Content-Type header in the request, the status
+ code of 412 (Length Required) is displayed. (Optionally) If the MD5
+ checksum of the data written to the storage system does not match
+ the ETag value, the status code of 422 (Unprocessable Entity) is
+ displayed.
+
+#### Chunked Transfer Encoding
+
+You can upload data without knowing the size of the data to be uploaded.
+You can do this by specifying an HTTP header of Transfer-Encoding:
+chunked and without using a Content-Length header.
+
+You can use this feature while doing a DB dump, piping the output
+through gzip, and then piping the data directly into Object Storage
+without having to buffer the data to disk to compute the file size.
+
+- To create or update an object, run the following command:
+
+ PUT /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <authentication-token-key>
+ Transfer-Encoding: chunked
+ X-Object-Meta-PIN: 2343
+
+ For example,
+
+ PUT /v1/AUTH_test/pictures/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ Transfer-Encoding: chunked
+ X-Object-Meta-PIN: 2343
+ 19
+ A bunch of data broken up
+ D
+ into chunks.
+ 0
+
+### Copying Object
+
+You can copy object from one container to another or add a new object
+and then add reference to designate the source of the data from another
+container.
+
+**To copy object from one container to another**
+
+- To add a new object and designate the source of the data from
+ another container, run the following command:
+
+ COPY /<apiversion>/<account>/<container>/<sourceobject> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: < authentication-token-key>
+ Destination: /<container>/<destinationobject>
+
+ For example,
+
+ COPY /v1/AUTH_test/images/dogs HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ Destination: /photos/cats
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To copy an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X COPY -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554' -H 'Destination: /photos/cats' -k https://example.storage.com:443/v1/AUTH_test/images/dogs
+
+ The status code of 201 (Created) indicates that you have
+ successfully copied the object. If there is a missing content-Length
+ or Content-Type header in the request, the status code of 412
+ (Length Required) is displayed.
+
+ You can also use PUT command to copy object by using additional
+ header `X-Copy-From: container/obj`.
+
+- To use PUT command to copy an object, run the following command:
+
+ PUT /v1/AUTH_test/photos/cats HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Copy-From: /images/dogs
+
+ HTTP/1.1 201 Created
+ Date: Wed, 13 Jul 2011 18:32:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To copy an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X PUT -H 'X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ -H 'X-Copy-From: /images/dogs' –k
+ https://example.storage.com:443/v1/AUTH_test/images/cats
+
+ The status code of 201 (Created) indicates that you have
+ successfully copied the object.
+
+### Displaying Object Information
+
+You can issue GET command on an object to view the object data of the
+object.
+
+- To display the content of an object run the following command:
+
+ GET /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+ For example,
+
+ GET /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 200 Ok
+ Date: Wed, 13 Jul 2011 23:52:21 GMT
+ Server: Apache
+ Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
+ ETag: 8a964ee2a5e88be344f36c22562a6486
+ Content-Length: 534210
+ [.........]
+
+ To display the content of an object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X GET -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -k
+
+ The status code of 200 (Ok) indicates the object's data is displayed
+ successfully. If that object does not exist, the status code 404
+ (Not Found) is displayed.
+
+### Displaying Object Metadata
+
+You can issue HEAD command on an object to view the object metadata and
+other standard HTTP headers. You must send only authorization token as
+header.
+
+- To display the metadata of the object, run the following command:
+
+<!-- -->
+
+ HEAD /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+For example,
+
+ HEAD /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 21:52:21 GMT
+ Server: Apache
+ Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
+ ETag: 8a964ee2a5e88be344f36c22562a6486
+ Content-Length: 512000
+ Content-Type: text/plain; charset=UTF-8
+ X-Object-Meta-House: Cat
+ X-Object-Meta-Zoo: Cat
+ X-Object-Meta-Home: Cat
+ X-Object-Meta-Park: Cat
+
+To display the metadata of the object using cURL (for the above
+example), run the following command:
+
+ curl -v -X HEAD -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -k
+
+The status code of 204 (No Content) indicates the object's metadata is
+displayed successfully. If that object does not exist, the status code
+404 (Not Found) is displayed.
+
+### Updating Object Metadata
+
+You can issue POST command on an object name only to set or overwrite
+arbitrary key metadata. You cannot change the object's other headers
+such as Content-Type, ETag and others using POST operation. The POST
+command will delete all the existing metadata and replace it with the
+new arbitrary key metadata.
+
+You must prefix **X-Object-Meta-** to the key names.
+
+- To update the metadata of an object, run the following command:
+
+ POST /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+ X-Object-Meta-<key>: <new value>
+ X-Object-Meta-<key>: <new value>
+
+ For example,
+
+ POST /v1/AUTH_test/images/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+ X-Object-Meta-Zoo: Lion
+ X-Object-Meta-Home: Dog
+
+ HTTP/1.1 202 Accepted
+ Date: Wed, 13 Jul 2011 22:52:21 GMT
+ Server: Apache
+ Content-Length: 0
+ Content-Type: text/plain; charset=UTF-8
+
+ To update the metadata of an object using cURL (for the above
+ example), run the following command:
+
+ curl -v -X POST -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/images/cat -H ' X-Object-
+ Meta-Zoo: Lion' -H 'X-Object-Meta-Home: Dog' -k
+
+ The status code of 202 (Accepted) indicates that you have
+ successfully updated the object's metadata. If that object does not
+ exist, the status code 404 (Not Found) is displayed.
+
+### Deleting Object
+
+You can use DELETE command to permanently delete the object.
+
+The DELETE command on an object will be processed immediately and any
+subsequent operations like GET, HEAD, POST, or DELETE on the object will
+display 404 (Not Found) error.
+
+- To delete an object, run the following command:
+
+ DELETE /<apiversion>/<account>/<container>/<object> HTTP/1.1
+ Host: <storage URL>
+ X-Auth-Token: <Authentication-token-key>
+
+ For example,
+
+ DELETE /v1/AUTH_test/pictures/cat HTTP/1.1
+ Host: example.storage.com
+ X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
+
+ HTTP/1.1 204 No Content
+ Date: Wed, 13 Jul 2011 20:52:21 GMT
+ Server: Apache
+ Content-Type: text/plain; charset=UTF-8
+
+ To delete an object using cURL (for the above example), run the
+ following command:
+
+ curl -v -X DELETE -H 'X-Auth-Token:
+ AUTH_tkde3ad38b087b49bbbac0494f7600a554'
+ https://example.storage.com:443/v1/AUTH_test/pictures/cat -k
+
+ The status code of 204 (No Content) indicates that you have
+ successfully deleted the object. If that object does not exist, the
+ status code 404 (Not Found) is displayed.
+
+ []: http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/
diff --git a/doc/admin-guide/en-US/markdown/admin_commandref.md b/doc/admin-guide/en-US/markdown/admin_commandref.md
new file mode 100644
index 000000000..4ff05f4ef
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_commandref.md
@@ -0,0 +1,180 @@
+Command Reference
+=================
+
+This section describes the available commands and includes the following
+section:
+
+- gluster Command
+
+ Gluster Console Manager (command line interpreter)
+
+- glusterd Daemon
+
+ Gluster elastic volume management daemon
+
+gluster Command
+===============
+
+**NAME**
+
+gluster - Gluster Console Manager (command line interpreter)
+
+**SYNOPSIS**
+
+To run the program and display the gluster prompt:
+
+**gluster**
+
+To specify a command directly: gluster [COMMANDS] [OPTIONS]
+
+**DESCRIPTION**
+
+The Gluster Console Manager is a command line utility for elastic volume
+management. You can run the gluster command on any export server. The
+command enables administrators to perform cloud operations such as
+creating, expanding, shrinking, rebalancing, and migrating volumes
+without needing to schedule server downtime.
+
+**COMMANDS**
+
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Command Description
+ ---------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ **Volume**
+
+ volume info [all | VOLNAME] Displays information about all volumes, or the specified volume.
+
+ volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ... Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
+
+ volume delete VOLNAME Deletes the specified volume.
+
+ volume start VOLNAME Starts the specified volume.
+
+ volume stop VOLNAME [force] Stops the specified volume.
+
+ volume rename VOLNAME NEW-VOLNAME Renames the specified volume.
+
+ volume help Displays help for the volume command.
+
+ **Brick**
+
+ volume add-brick VOLNAME NEW-BRICK ... Adds the specified brick to the specified volume.
+
+ volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status Replaces the specified brick.
+
+ volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... Removes the specified brick from the specified volume.
+
+ **Rebalance**
+
+ volume rebalance VOLNAME start Starts rebalancing the specified volume.
+
+ volume rebalance VOLNAME stop Stops rebalancing the specified volume.
+
+ volume rebalance VOLNAME status Displays the rebalance status of the specified volume.
+
+ **Log**
+
+ volume log filename VOLNAME [BRICK] DIRECTORY Sets the log directory for the corresponding volume/brick.
+
+ volume log rotate VOLNAME [BRICK] Rotates the log file for corresponding volume/brick.
+
+ volume log locate VOLNAME [BRICK] Locates the log file for corresponding volume/brick.
+
+ **Peer**
+
+ peer probe HOSTNAME Probes the specified peer.
+
+ peer detach HOSTNAME Detaches the specified peer.
+
+ peer status Displays the status of peers.
+
+ peer help Displays help for the peer command.
+
+ **Geo-replication**
+
+ volume geo-replication MASTER SLAVE start Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.
+
+ You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
+
+ volume geo-replication MASTER SLAVE stop Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.
+
+ You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
+
+ volume geo-replication MASTER SLAVE config [options] Configure geo-replication options between the hosts specified by MASTER and SLAVE.
+
+ gluster-command COMMAND The path where the gluster command is installed.
+
+ gluster-log-level LOGFILELEVEL The log level for gluster processes.
+
+ log-file LOGFILE The path to the geo-replication log file.
+
+ log-level LOGFILELEVEL The log level for geo-replication.
+
+ remote-gsyncd COMMAND The path where the gsyncd binary is installed on the remote machine.
+
+ ssh-command COMMAND The ssh command to use to connect to the remote machine (the default is ssh).
+
+ rsync-command COMMAND The rsync command to use for synchronizing the files (the default is rsync).
+
+ volume\_id= UID The command to delete the existing master UID for the intermediate/slave node.
+
+ timeout SECONDS The timeout period.
+
+ sync-jobs N The number of simultaneous files/directories that can be synchronized.
+
+ ignore-deletes If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.
+
+ **Other**
+
+ help Display the command options.
+
+ quit Exit the gluster command line interface.
+ ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+**FILES**
+
+/var/lib/glusterd/\*
+
+**SEE ALSO**
+
+fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
+glusterd(8)
+
+glusterd Daemon
+===============
+
+**NAME**
+
+glusterd - Gluster elastic volume management daemon
+
+**SYNOPSIS**
+
+glusterd [OPTION...]
+
+**DESCRIPTION**
+
+The glusterd daemon is used for elastic volume management. The daemon
+must be run on all export servers.
+
+**OPTIONS**
+
+ Option Description
+ ----------------------------------- ----------------------------------------------------------------------------------------------------------------
+ **Basic**
+ -l=LOGFILE, --log-file=LOGFILE Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).
+ -L=LOGLEVEL, --log-level=LOGLEVEL Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
+ --debug Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.
+ -N, --no-daemon Runs the program in the foreground.
+ **Miscellaneous**
+ -?, --help Displays this help.
+ --usage Displays a short usage message.
+ -V, --version Prints the program version.
+
+**FILES**
+
+/var/lib/glusterd/\*
+
+**SEE ALSO**
+
+fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8),
+gluster(8)
diff --git a/doc/admin-guide/en-US/markdown/admin_console.md b/doc/admin-guide/en-US/markdown/admin_console.md
new file mode 100644
index 000000000..9b69de02d
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_console.md
@@ -0,0 +1,51 @@
+Using the Gluster Console Manager – Command Line Utility
+========================================================
+
+The Gluster Console Manager is a single command line utility that
+simplifies configuration and management of your storage environment. The
+Gluster Console Manager is similar to the LVM (Logical Volume Manager)
+CLI or ZFS Command Line Interface, but across multiple storage servers.
+You can use the Gluster Console Manager online, while volumes are
+mounted and active. Gluster automatically synchronizes volume
+configuration information across all Gluster servers.
+
+Using the Gluster Console Manager, you can create new volumes, start
+volumes, and stop volumes, as required. You can also add bricks to
+volumes, remove bricks from existing volumes, as well as change
+translator settings, among other operations.
+
+You can also use the commands to create scripts for automation, as well
+as use the commands as an API to allow integration with third-party
+applications.
+
+**Running the Gluster Console Manager**
+
+You can run the Gluster Console Manager on any GlusterFS server either
+by invoking the commands or by running the Gluster CLI in interactive
+mode. You can also use the gluster command remotely using SSH.
+
+- To run commands directly:
+
+ ` # gluster peer `
+
+ For example:
+
+ ` # gluster peer status `
+
+- To run the Gluster Console Manager in interactive mode
+
+ `# gluster`
+
+ You can execute gluster commands from the Console Manager prompt:
+
+ ` gluster> `
+
+ For example, to view the status of the peer server:
+
+ \# `gluster `
+
+ `gluster > peer status `
+
+ Display the status of the peer.
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
new file mode 100644
index 000000000..09c757781
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md
@@ -0,0 +1,172 @@
+Managing Directory Quota
+========================
+
+Directory quotas in GlusterFS allow you to set limits on usage of disk
+space by directories or volumes. The storage administrators can control
+the disk space utilization at the directory and/or volume levels in
+GlusterFS by setting limits to allocatable disk space at any level in
+the volume and directory hierarchy. This is particularly useful in cloud
+deployments to facilitate utility billing model.
+
+> **Note**
+>
+> For now, only Hard limit is supported. Here, the limit cannot be
+> exceeded and attempts to use more disk space or inodes beyond the set
+> limit will be denied.
+
+System administrators can also monitor the resource utilization to limit
+the storage for the users depending on their role in the organization.
+
+You can set the quota at the following levels:
+
+- Directory level – limits the usage at the directory level
+
+- Volume level – limits the usage at the volume level
+
+> **Note**
+>
+> You can set the disk limit on the directory even if it is not created.
+> The disk limit is enforced immediately after creating that directory.
+> For more information on setting disk limit, see ?.
+
+Enabling Quota
+==============
+
+You must enable Quota to set disk limits.
+
+**To enable quota**
+
+- Enable the quota using the following command:
+
+ `# gluster volume quota enable `
+
+ For example, to enable quota on test-volume:
+
+ # gluster volume quota test-volume enable
+ Quota is enabled on /test-volume
+
+Disabling Quota
+===============
+
+You can disable Quota, if needed.
+
+**To disable quota:**
+
+- Disable the quota using the following command:
+
+ `# gluster volume quota disable `
+
+ For example, to disable quota translator on test-volume:
+
+ # gluster volume quota test-volume disable
+ Quota translator is disabled on /test-volume
+
+Setting or Replacing Disk Limit
+===============================
+
+You can create new directories in your storage environment and set the
+disk limit or set disk limit for the existing directories. The directory
+name should be relative to the volume with the export directory/mount
+being treated as "/".
+
+**To set or replace disk limit**
+
+- Set the disk limit using the following command:
+
+ `# gluster volume quota limit-usage /`
+
+ For example, to set limit on data directory on test-volume where
+ data is a directory under the export directory:
+
+ # gluster volume quota test-volume limit-usage /data 10GB
+ Usage limit has been set on /data
+
+ > **Note**
+ >
+ > In a multi-level directory hierarchy, the strictest disk limit
+ > will be considered for enforcement.
+
+Displaying Disk Limit Information
+=================================
+
+You can display disk limit information on all the directories on which
+the limit is set.
+
+**To display disk limit information**
+
+- Display disk limit information of all the directories on which limit
+ is set, using the following command:
+
+ `# gluster volume quota list`
+
+ For example, to see the set disks limit on test-volume:
+
+ # gluster volume quota test-volume list
+
+
+ /Test/data 10 GB 6 GB
+ /Test/data1 10 GB 4 GB
+
+- Display disk limit information on a particular directory on which
+ limit is set, using the following command:
+
+ `# gluster volume quota list `
+
+ For example, to see the set limit on /data directory of test-volume:
+
+ # gluster volume quota test-volume list /data
+
+
+ /Test/data 10 GB 6 GB
+
+Updating Memory Cache Size
+==========================
+
+For performance reasons, quota caches the directory sizes on client. You
+can set timeout indicating the maximum valid duration of directory sizes
+in cache, from the time they are populated.
+
+For example: If there are multiple clients writing to a single
+directory, there are chances that some other client might write till the
+quota limit is exceeded. However, this new file-size may not get
+reflected in the client till size entry in cache has become stale
+because of timeout. If writes happen on this client during this
+duration, they are allowed even though they would lead to exceeding of
+quota-limits, since size in cache is not in sync with the actual size.
+When timeout happens, the size in cache is updated from servers and will
+be in sync and no further writes will be allowed. A timeout of zero will
+force fetching of directory sizes from server for every operation that
+modifies file data and will effectively disables directory size caching
+on client side.
+
+**To update the memory cache size**
+
+- Update the memory cache size using the following command:
+
+ `# gluster volume set features.quota-timeout`
+
+ For example, to update the memory cache size for every 5 seconds on
+ test-volume:
+
+ # gluster volume set test-volume features.quota-timeout 5
+ Set volume successful
+
+Removing Disk Limit
+===================
+
+You can remove set disk limit, if you do not want quota anymore.
+
+**To remove disk limit**
+
+- Remove disk limit set on a particular directory using the following
+ command:
+
+ `# gluster volume quota remove `
+
+ For example, to remove the disk limit on /data directory of
+ test-volume:
+
+ # gluster volume quota test-volume remove /data
+ Usage limit set on /data is removed
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_geo-replication.md b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
new file mode 100644
index 000000000..849957244
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_geo-replication.md
@@ -0,0 +1,738 @@
+Managing Geo-replication
+========================
+
+Geo-replication provides a continuous, asynchronous, and incremental
+replication service from one site to another over Local Area Networks
+(LANs), Wide Area Network (WANs), and across the Internet.
+
+Geo-replication uses a master–slave model, whereby replication and
+mirroring occurs between the following partners:
+
+- Master – a GlusterFS volume
+
+- Slave – a slave which can be of the following types:
+
+ - A local directory which can be represented as file URL like
+ `file:///path/to/dir`. You can use shortened form, for example,
+ ` /path/to/dir`.
+
+ - A GlusterFS Volume - Slave volume can be either a local volume
+ like `gluster://localhost:volname` (shortened form - `:volname`)
+ or a volume served by different host like
+ `gluster://host:volname` (shortened form - `host:volname`).
+
+ > **Note**
+ >
+ > Both of the above types can be accessed remotely using SSH tunnel.
+ > To use SSH, add an SSH prefix to either a file URL or gluster type
+ > URL. For example, ` ssh://root@remote-host:/path/to/dir`
+ > (shortened form - `root@remote-host:/path/to/dir`) or
+ > `ssh://root@remote-host:gluster://localhost:volname` (shortened
+ > from - `root@remote-host::volname`).
+
+This section introduces Geo-replication, illustrates the various
+deployment scenarios, and explains how to configure the system to
+provide replication and mirroring in your environment.
+
+Replicated Volumes vs Geo-replication
+=====================================
+
+The following table lists the difference between replicated volumes and
+geo-replication:
+
+ Replicated Volumes Geo-replication
+ --------------------------------------------------------------------------------------- -----------------------------------------------------------------------------------------------------------------
+ Mirrors data across clusters Mirrors data across geographically distributed clusters
+ Provides high-availability Ensures backing up of data for disaster recovery
+ Synchronous replication (each and every file operation is sent across all the bricks) Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences)
+
+Preparing to Deploy Geo-replication
+===================================
+
+This section provides an overview of the Geo-replication deployment
+scenarios, describes how you can check the minimum system requirements,
+and explores common deployment scenarios.
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Exploring Geo-replication Deployment Scenarios
+----------------------------------------------
+
+Geo-replication provides an incremental replication service over Local
+Area Networks (LANs), Wide Area Network (WANs), and across the Internet.
+This section illustrates the most common deployment scenarios for
+Geo-replication, including the following:
+
+- Geo-replication over LAN
+
+- Geo-replication over WAN
+
+- Geo-replication over the Internet
+
+- Multi-site cascading Geo-replication
+
+**Geo-replication over LAN**
+
+You can configure Geo-replication to mirror data over a Local Area
+Network.
+
+![ Geo-replication over LAN ][]
+
+**Geo-replication over WAN**
+
+You can configure Geo-replication to replicate data over a Wide Area
+Network.
+
+![ Geo-replication over WAN ][]
+
+**Geo-replication over Internet**
+
+You can configure Geo-replication to mirror data over the Internet.
+
+![ Geo-replication over Internet ][]
+
+**Multi-site cascading Geo-replication**
+
+You can configure Geo-replication to mirror data in a cascading fashion
+across multiple sites.
+
+![ Multi-site cascading Geo-replication ][]
+
+Geo-replication Deployment Overview
+-----------------------------------
+
+Deploying Geo-replication involves the following steps:
+
+1. Verify that your environment matches the minimum system requirement.
+ For more information, see ?.
+
+2. Determine the appropriate deployment scenario. For more information,
+ see ?.
+
+3. Start Geo-replication on master and slave systems, as required. For
+ more information, see ?.
+
+Checking Geo-replication Minimum Requirements
+---------------------------------------------
+
+Before deploying GlusterFS Geo-replication, verify that your systems
+match the minimum requirements.
+
+The following table outlines the minimum requirements for both master
+and slave nodes within your environment:
+
+ Component Master Slave
+ ------------------------ --------------------------------------------------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Operating System GNU/Linux GNU/Linux
+ Filesystem GlusterFS 3.2 or higher GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively)
+ Python Python 2.4 (with ctypes external module), or Python 2.5 (or higher) Python 2.4 (with ctypes external module), or Python 2.5 (or higher)
+ Secure shell OpenSSH version 4.0 (or higher) SSH2-compliant daemon
+ Remote synchronization rsync 3.0.7 or higher rsync 3.0.7 or higher
+ FUSE GlusterFS supported versions GlusterFS supported versions
+
+Setting Up the Environment for Geo-replication
+----------------------------------------------
+
+**Time Synchronization**
+
+- On bricks of a geo-replication master volume, all the servers' time
+ must be uniform. You are recommended to set up NTP (Network Time
+ Protocol) service to keep the bricks sync in time and avoid
+ out-of-time sync effect.
+
+ For example: In a Replicated volume where brick1 of the master is at
+ 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes
+ time lag, all the changes in brick2 between this period may go
+ unnoticed during synchronization of files with Slave.
+
+ For more information on setting up NTP, see [][].
+
+**To setup Geo-replication for SSH**
+
+Password-less login has to be set up between the host machine (where
+geo-replication Start command will be issued) and the remote machine
+(where slave process should be launched through SSH).
+
+1. On the node where geo-replication sessions are to be set up, run the
+ following command:
+
+ `# ssh-keygen -f /var/lib/glusterd/geo-replication/secret.pem`
+
+ Press Enter twice to avoid passphrase.
+
+2. Run the following command on master for all the slave hosts:
+
+ `# ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub @`
+
+Setting Up the Environment for a Secure Geo-replication Slave
+-------------------------------------------------------------
+
+You can configure a secure slave using SSH so that master is granted a
+restricted access. With GlusterFS, you need not specify configuration
+parameters regarding the slave on the master-side configuration. For
+example, the master does not require the location of the rsync program
+on slave but the slave must ensure that rsync is in the PATH of the user
+which the master connects using SSH. The only information that master
+and slave have to negotiate are the slave-side user account, slave's
+resources that master uses as slave resources, and the master's public
+key. Secure access to the slave can be established using the following
+options:
+
+- Restricting Remote Command Execution
+
+- Using `Mountbroker` for Slaves
+
+- Using IP based Access Control
+
+**Backward Compatibility**
+
+Your existing Ge-replication environment will work with GlusterFS,
+except for the following:
+
+- The process of secure reconfiguration affects only the glusterfs
+ instance on slave. The changes are transparent to master with the
+ exception that you may have to change the SSH target to an
+ unprivileged account on slave.
+
+- The following are the some exceptions where this might not work:
+
+ - Geo-replication URLs which specify the slave resource when
+ configuring master will include the following special
+ characters: space, \*, ?, [;
+
+ - Slave must have a running instance of glusterd, even if there is
+ no gluster volume among the mounted slave resources (that is,
+ file tree slaves are used exclusively) .
+
+### Restricting Remote Command Execution
+
+If you restrict remote command execution, then the Slave audits commands
+coming from the master and the commands related to the given
+geo-replication session is allowed. The Slave also provides access only
+to the files within the slave resource which can be read or manipulated
+by the Master.
+
+To restrict remote command execution:
+
+1. Identify the location of the gsyncd helper utility on Slave. This
+ utility is installed in `PREFIX/libexec/glusterfs/gsyncd`, where
+ PREFIX is a compile-time parameter of glusterfs. For example,
+ `--prefix=PREFIX` to the configure script with the following common
+ values` /usr, /usr/local, and /opt/glusterfs/glusterfs_version`.
+
+2. Ensure that command invoked from master to slave passed through the
+ slave's gsyncd utility.
+
+ You can use either of the following two options:
+
+ - Set gsyncd with an absolute path as the shell for the account
+ which the master connects through SSH. If you need to use a
+ privileged account, then set it up by creating a new user with
+ UID 0.
+
+ - Setup key authentication with command enforcement to gsyncd. You
+ must prefix the copy of master's public key in the Slave
+ account's `authorized_keys` file with the following command:
+
+ `command=<path to gsyncd>`.
+
+ For example,
+ `command="PREFIX/glusterfs/gsyncd" ssh-rsa AAAAB3Nza....`
+
+### Using Mountbroker for Slaves
+
+`mountbroker` is a new service of glusterd. This service allows an
+unprivileged process to own a GlusterFS mount by registering a label
+(and DSL (Domain-specific language) options ) with glusterd through a
+glusterd volfile. Using CLI, you can send a mount request to glusterd to
+receive an alias (symlink) of the mounted volume.
+
+A request from the agent , the unprivileged slave agents use the
+mountbroker service of glusterd to set up an auxiliary gluster mount for
+the agent in a special environment which ensures that the agent is only
+allowed to access with special parameters that provide administrative
+level access to the particular volume.
+
+**To setup an auxiliary gluster mount for the agent**:
+
+1. Create a new group. For example, `geogroup`.
+
+2. Create a unprivileged account. For example, ` geoaccount`. Make it a
+ member of ` geogroup`.
+
+3. Create a new directory owned by root and with permissions *0711.*
+ For example, create a create mountbroker-root directory
+ `/var/mountbroker-root`.
+
+4. Add the following options to the glusterd volfile, assuming the name
+ of the slave gluster volume as `slavevol`:
+
+ `option mountbroker-root /var/mountbroker-root `
+
+ `option mountbroker-geo-replication.geoaccount slavevol`
+
+ `option geo-replication-log-group geogroup`
+
+ If you are unable to locate the glusterd volfile at
+ `/etc/glusterfs/glusterd.vol`, you can create a volfile containing
+ both the default configuration and the above options and place it at
+ `/etc/glusterfs/`.
+
+ A sample glusterd volfile along with default options:
+
+ volume management
+ type mgmt/glusterd
+ option working-directory /var/lib/glusterd
+ option transport-type socket,rdma
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+
+ option mountbroker-root /var/mountbroker-root
+ option mountbroker-geo-replication.geoaccount slavevol
+ option geo-replication-log-group geogroup
+ end-volume
+
+ If you host multiple slave volumes on Slave, you can repeat step 2.
+ for each of them and add the following options to the `volfile`:
+
+ option mountbroker-geo-replication.geoaccount2 slavevol2
+ option mountbroker-geo-replication.geoaccount3 slavevol3
+
+5. Setup Master to access Slave as `geoaccount@Slave`.
+
+ You can add multiple slave volumes within the same account
+ (geoaccount) by providing comma-separated list (without spaces) as
+ the argument of `mountbroker-geo-replication.geogroup`. You can also
+ have multiple options of the form `mountbroker-geo-replication.*`.
+ It is recommended to use one service account per Master machine. For
+ example, if there are multiple slave volumes on Slave for the master
+ machines Master1, Master2, and Master3, then create a dedicated
+ service user on Slave for them by repeating Step 2. for each (like
+ geogroup1, geogroup2, and geogroup3), and then add the following
+ corresponding options to the volfile:
+
+ `option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13`
+
+ `option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22`
+
+ `option mountbroker-geo-replication.geoaccount3 slavevol31`
+
+ Now set up Master1 to ssh to geoaccount1@Slave, etc.
+
+ You must restart glusterd after making changes in the configuration
+ to effect the updates.
+
+### Using IP based Access Control
+
+You can use IP based access control method to provide access control for
+the slave resources using IP address. You can use method for both Slave
+and file tree slaves, but in the section, we are focusing on file tree
+slaves using this method.
+
+To set access control based on IP address for file tree slaves:
+
+1. Set a general restriction for accessibility of file tree resources:
+
+ `# gluster volume geo-replication '/*' config allow-network ::1,127.0.0.1 `
+
+ This will refuse all requests for spawning slave agents except for
+ requests initiated locally.
+
+2. If you want the to lease file tree at `/data/slave-tree` to Master,
+ enter the following command:
+
+ `# gluster volume geo-replicationconfig allow-network `
+
+ `MasterIP` is the IP address of Master. The slave agent spawn
+ request from master will be accepted if it is executed at
+ `/data/slave-tree`.
+
+If the Master side network configuration does not enable the Slave to
+recognize the exact IP address of Master, you can use CIDR notation to
+specify a subnet instead of a single IP address as MasterIP or even
+comma-separated lists of CIDR subnets.
+
+If you want to extend IP based access control to gluster slaves, use the
+following command:
+
+`# gluster volume geo-replication '*' config allow-network ::1,127.0.0.1`
+
+Starting Geo-replication
+========================
+
+This section describes how to configure and start Gluster
+Geo-replication in your storage environment, and verify that it is
+functioning correctly.
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Starting Geo-replication
+------------------------
+
+To start Gluster Geo-replication
+
+- Start geo-replication between the hosts using the following command:
+
+ `# gluster volume geo-replication start`
+
+ For example:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir start
+ Starting geo-replication session between Volume1
+ example.com:/data/remote_dir has been successful
+
+ > **Note**
+ >
+ > You may need to configure the service before starting Gluster
+ > Geo-replication. For more information, see ?.
+
+Verifying Successful Deployment
+-------------------------------
+
+You can use the gluster command to verify the status of Gluster
+Geo-replication in your environment.
+
+**To verify the status Gluster Geo-replication**
+
+- Verify the status by issuing the following command on host:
+
+ `# gluster volume geo-replication status`
+
+ For example:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir status`
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Starting....
+
+Displaying Geo-replication Status Information
+---------------------------------------------
+
+You can display status information about a specific geo-replication
+master session, or a particular master-slave session, or all
+geo-replication sessions, as needed.
+
+**To display geo-replication status information**
+
+- Display information of all geo-replication sessions using the
+ following command:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Starting....
+
+- Display information of a particular master slave session using the
+ following command:
+
+ `# gluster volume geo-replication status`
+
+ For example, to display information of Volume1 and
+ example.com:/data/remote\_dir
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir status`
+
+ The status of the geo-replication between Volume1 and
+ example.com:/data/remote\_dir is displayed.
+
+- Display information of all geo-replication sessions belonging to a
+ master
+
+ `# gluster volume geo-replication MASTER status`
+
+ For example, to display information of Volume1
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK
+
+ Volume1 ssh://example.com:file:///data/remote_dir OK
+
+ The status of a session could be one of the following four:
+
+- **Starting**: This is the initial phase of the Geo-replication
+ session; it remains in this state for a minute, to make sure no
+ abnormalities are present.
+
+- **OK**: The geo-replication session is in a stable state.
+
+- **Faulty**: The geo-replication session has witnessed some
+ abnormality and the situation has to be investigated further. For
+ further information, see ? section.
+
+- **Corrupt**: The monitor thread which is monitoring the
+ geo-replication session has died. This situation should not occur
+ normally, if it persists contact Red Hat Support[][1].
+
+Configuring Geo-replication
+---------------------------
+
+To configure Gluster Geo-replication
+
+- Use the following command at the Gluster command line:
+
+ `# gluster volume geo-replication config [options]`
+
+ For more information about the options, see ?.
+
+ For example:
+
+ To view list of all option/value pair, use the following command:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir config`
+
+Stopping Geo-replication
+------------------------
+
+You can use the gluster command to stop Gluster Geo-replication (syncing
+of data from Master to Slave) in your environment.
+
+**To stop Gluster Geo-replication**
+
+- Stop geo-replication between the hosts using the following command:
+
+ `# gluster volume geo-replication stop `
+
+ For example:
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir stop
+ Stopping geo-replication session between Volume1 and
+ example.com:/data/remote_dir has been successful
+
+ See ? for more information about the gluster command.
+
+Restoring Data from the Slave
+=============================
+
+You can restore data from the slave to the master volume, whenever the
+master volume becomes faulty for reasons like hardware failure.
+
+The example in this section assumes that you are using the Master Volume
+(Volume1) with the following configuration:
+
+ machine1# gluster volume info
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 2
+ Transport-type: tcp
+ Bricks:
+ Brick1: machine1:/export/dir16
+ Brick2: machine2:/export/dir16
+ Options Reconfigured:
+ geo-replication.indexing: on
+
+The data is syncing from master volume (Volume1) to slave directory
+(example.com:/data/remote\_dir). To view the status of this
+geo-replication session run the following command on Master:
+
+ # gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir OK
+
+**Before Failure**
+
+Assume that the Master volume had 100 files and was mounted at
+/mnt/gluster on one of the client machines (client). Run the following
+command on Client machine to view the list of files:
+
+ client# ls /mnt/gluster | wc –l
+ 100
+
+The slave directory (example.com) will have same data as in the master
+volume and same can be viewed by running the following command on slave:
+
+ example.com# ls /data/remote_dir/ | wc –l
+ 100
+
+**After Failure**
+
+If one of the bricks (machine2) fails, then the status of
+Geo-replication session is changed from "OK" to "Faulty". To view the
+status of this geo-replication session run the following command on
+Master:
+
+ # gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
+
+ MASTER SLAVE STATUS
+ ______ ______________________________ ____________
+ Volume1 root@example.com:/data/remote_dir Faulty
+
+Machine2 is failed and now you can see discrepancy in number of files
+between master and slave. Few files will be missing from the master
+volume but they will be available only on slave as shown below.
+
+Run the following command on Client:
+
+ client # ls /mnt/gluster | wc –l
+ 52
+
+Run the following command on slave (example.com):
+
+ Example.com# # ls /data/remote_dir/ | wc –l
+ 100
+
+**To restore data from the slave machine**
+
+1. Stop all Master's geo-replication sessions using the following
+ command:
+
+ `# gluster volume geo-replication stop`
+
+ For example:
+
+ machine1# gluster volume geo-replication Volume1
+ example.com:/data/remote_dir stop
+
+ Stopping geo-replication session between Volume1 &
+ example.com:/data/remote_dir has been successful
+
+ > **Note**
+ >
+ > Repeat `# gluster volume geo-replication stop `command on all
+ > active geo-replication sessions of master volume.
+
+2. Replace the faulty brick in the master by using the following
+ command:
+
+ `# gluster volume replace-brick start`
+
+ For example:
+
+ machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start
+ Replace-brick started successfully
+
+3. Commit the migration of data using the following command:
+
+ `# gluster volume replace-brick commit force `
+
+ For example:
+
+ machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force
+ Replace-brick commit successful
+
+4. Verify the migration of brick by viewing the volume info using the
+ following command:
+
+ `# gluster volume info `
+
+ For example:
+
+ machine1# gluster volume info
+ Volume Name: Volume1
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 2
+ Transport-type: tcp
+ Bricks:
+ Brick1: machine1:/export/dir16
+ Brick2: machine3:/export/dir16
+ Options Reconfigured:
+ geo-replication.indexing: on
+
+5. Run rsync command manually to sync data from slave to master
+ volume's client (mount point).
+
+ For example:
+
+ `example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster`
+
+ Verify that the data is synced by using the following command:
+
+ On master volume, run the following command:
+
+ Client # ls | wc –l
+ 100
+
+ On the Slave run the following command:
+
+ example.com# ls /data/remote_dir/ | wc –l
+ 100
+
+ Now Master volume and Slave directory is synced.
+
+6. Restart geo-replication session from master to slave using the
+ following command:
+
+ `# gluster volume geo-replication start `
+
+ For example:
+
+ machine1# gluster volume geo-replication Volume1
+ example.com:/data/remote_dir start
+ Starting geo-replication session between Volume1 &
+ example.com:/data/remote_dir has been successful
+
+Best Practices
+==============
+
+**Manually Setting Time**
+
+If you have to change the time on your bricks manually, then you must
+set uniform time on all bricks. This avoids the out-of-time sync issue
+described in ?. Setting time backward corrupts the geo-replication
+index, so the recommended way to set the time manually is:
+
+1. Stop geo-replication between the master and slave using the
+ following command:
+
+ `# gluster volume geo-replication sto`p
+
+2. Stop the geo-replication indexing using the following command:
+
+ `# gluster volume set geo-replication.indexing of`f
+
+3. Set uniform time on all bricks.s
+
+4. Restart your geo-replication sessions by using the following
+ command:
+
+ `# gluster volume geo-replication start `
+
+**Running Geo-replication commands in one system**
+
+It is advisable to run the geo-replication commands in one of the bricks
+in the trusted storage pool. This is because, the log files for the
+geo-replication session would be stored in the \*Server\* where the
+Geo-replication start is initiated. Hence it would be easier to locate
+the log-files when required.
+
+**Isolation**
+
+Geo-replication slave operation is not sandboxed as of now and is ran as
+a privileged service. So for the security reason, it is advised to
+create a sandbox environment (dedicated machine / dedicated virtual
+machine / chroot/container type solution) by the administrator to run
+the geo-replication slave in it. Enhancement in this regard will be
+available in follow-up minor release.
+
+ [ Geo-replication over LAN ]: images/Geo-Rep_LAN.png
+ [ Geo-replication over WAN ]: images/Geo-Rep_WAN.png
+ [ Geo-replication over Internet ]: images/Geo-Rep03_Internet.png
+ [ Multi-site cascading Geo-replication ]: images/Geo-Rep04_Cascading.png
+ []: http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html
+ [1]: www.redhat.com/support/
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md b/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md
new file mode 100644
index 000000000..e76ee9151
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_managing_snapshots.md
@@ -0,0 +1,66 @@
+Managing GlusterFS Volume Snapshots
+==========================
+
+This section describes how to perform common GlusterFS volume snapshot
+management operations
+
+Pre-requisites
+=====================
+
+GlusterFS volume snapshot feature is based on thinly provisioned LVM snapshot.
+To make use of snapshot feature GlusterFS volume should fulfill following
+pre-requisites:
+
+* Each brick should be on an independent thinly provisioned LVM.
+* Brick LVM should not contain any other data other than brick.
+* None of the brick should be on a thick LVM.
+
+
+Snapshot Management
+=====================
+
+
+**Snapshot creation**
+
+*gluster snapshot create \<vol-name\> \[-n \<snap-name\>\] \[-d \<description\>\]*
+
+This command will create a snapshot of a GlusterFS volume. User can provide a snap-name and a description to identify the snap. The description cannot be more than 1024 characters.
+
+Volume should be present and it should be in started state.
+
+**Restoring snaps**
+
+*gluster snapshot restore -v \<vol-name\> \<snap-name\>*
+
+This command restores an already taken snapshot of a GlusterFS volume. Snapshot restore is an offline activity therefore if the volume is online then the restore operation will fail.
+
+Once the snapshot is restored it will be deleted from the list of snapshot.
+
+**Deleting snaps**
+
+*gluster snapshot delete \<volname\>\ -s \<snap-name\> \[force\]*
+
+This command will delete the specified snapshot.
+
+**Listing of available snaps**
+
+*gluster snapshot list \[\<volname\> \[-s \<snap-name>\]\]*
+
+This command is used to list all snapshots taken, or for a specified volume. If snap-name is provided then it will list the details of that snap.
+
+**Configuring the snapshot behavior**
+
+*gluster snapshot config \[\<vol-name | all\>\]*
+
+This command will display existing config values for a volume. If volume name is not provided then config values of all the volume is displayed. System config is displayed irrespective of volume name.
+
+*gluster snapshot config \<vol-name | all\> \[\<snap-max-hard-limit\> \<count\>\] \[\<snap-max-soft-limit\> \<percentage\>\]*
+
+The above command can be used to change the existing config values. If vol-name is provided then config value of that volume is changed, else it will set/change the system limit.
+
+The system limit is the default value of the config for all the volume. Volume specific limit cannot cross the system limit. If a volume specific limit is not provided then system limit will be considered.
+
+If any of this limit is decreased and the current snap count of the system/volume is more than the limit then the command will fail. If user still want to decrease the limit then force option should be used.
+
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
new file mode 100644
index 000000000..6c06e27a0
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
@@ -0,0 +1,710 @@
+Managing GlusterFS Volumes
+==========================
+
+This section describes how to perform common GlusterFS management
+operations, including the following:
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Tuning Volume Options
+=====================
+
+You can tune volume options, as needed, while the cluster is online and
+available.
+
+> **Note**
+>
+> Red Hat recommends you to set server.allow-insecure option to ON if
+> there are too many bricks in each volume or if there are too many
+> services which have already utilized all the privileged ports in the
+> system. Turning this option ON allows ports to accept/reject messages
+> from insecure ports. So, use this option only if your deployment
+> requires it.
+
+To tune volume options
+
+- Tune volume options using the following command:
+
+ `# gluster volume set `
+
+ For example, to specify the performance cache size for test-volume:
+
+ # gluster volume set test-volume performance.cache-size 256MB
+ Set volume successful
+
+ The following table lists the Volume options along with its
+ description and default value:
+
+ > **Note**
+ >
+ > The default options given here are subject to modification at any
+ > given time and may not be the same for all versions.
+
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Option Description Default Value Available Options
+ -------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------- ---------------------------------------------------------------------------------------
+ auth.allow IP addresses of the clients which should be allowed to access the volume. \* (allow all) Valid IP address which includes wild card patterns including \*, such as 192.168.1.\*
+
+ auth.reject IP addresses of the clients which should be denied to access the volume. NONE (reject none) Valid IP address which includes wild card patterns including \*, such as 192.168.2.\*
+
+ client.grace-timeout Specifies the duration for the lock state to be maintained on the client after a network disconnection. 10 10 - 1800 secs
+
+ cluster.self-heal-window-size Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. 16 0 - 1025 blocks
+
+ cluster.data-self-heal-algorithm Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. reset full | diff | reset
+
+ cluster.min-free-disk Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. 10% Percentage of required minimum free disk space
+
+ cluster.stripe-block-size Specifies the size of the stripe unit that will be read from or written to. 128 KB (for all files) size in bytes
+
+ cluster.self-heal-daemon Allows you to turn-off proactive self-heal on replicated volumes. on On | Off
+
+ cluster.ensure-durability This option makes sure the data/metadata is durable across abrupt shutdown of the brick. on On | Off
+
+ diagnostics.brick-log-level Changes the log-level of the bricks. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
+
+ diagnostics.client-log-level Changes the log-level of the clients. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE
+
+ diagnostics.latency-measurement Statistics related to the latency of each operation would be tracked. off On | Off
+
+ diagnostics.dump-fd-stats Statistics related to file-operations would be tracked. off On | Off
+
+ feature.read-only Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. off On | Off
+
+ features.lock-heal Enables self-healing of locks when the network disconnects. on On | Off
+
+ features.quota-timeout For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. 0 0 - 3600 secs
+
+ geo-replication.indexing Use this option to automatically sync the changes in the filesystem from Master to Slave. off On | Off
+
+ network.frame-timeout The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. 1800 (30 mins) 1800 secs
+
+ network.ping-timeout The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. 42 Secs 42 Secs
+ This reconnect is a very expensive operation and should be avoided.
+
+ nfs.enable-ino32 For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: off On | Off
+ \* Built 32-bit and run on 32-bit machines.
+
+ \* Built 32-bit on 64-bit systems.
+
+ \* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.
+
+ Either of the conditions above can lead to application on Linux NFS clients failing with "Invalid argument" or "Value too large for defined data type" errors.
+
+ nfs.volume-access Set the access type for the specified sub-volume. read-write read-write|read-only
+
+ nfs.trusted-write If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. off On | Off
+ In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.
+
+ nfs.trusted-sync All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. off On | Off
+
+ nfs.export-dir This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \<dir\>[(hostspec[|hostspec|...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. No sub directory exported. Absolute path with allowed list of IP/hostname.
+
+ nfs.export-volumes Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. on On | Off
+
+ nfs.rpc-auth-unix Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. on On | Off
+
+ nfs.rpc-auth-null Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. on On | Off
+
+ nfs.rpc-auth-allow\<IP- Addresses\> Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
+
+ nfs.rpc-auth-reject IP- Addresses Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name
+
+ nfs.ports-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. off On | Off
+
+ nfs.addr-namelookup Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. on On | Off
+
+ nfs.register-with- portmap For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. on On | Off
+
+ nfs.port \<PORT- NUMBER\> Use this option on systems that need Gluster NFS to be associated with a non-default port number. 38465- 38467
+
+ nfs.disable Turn-off volume being exported by NFS off On | Off
+
+ performance.write-behind-window-size Size of the per-file write-behind buffer. 1 MB Write-behind cache size
+
+ performance.io-thread-count The number of threads in IO threads translator. 16 0 - 65
+
+ performance.flush-behind If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. On On | Off
+
+ performance.cache-max-file-size Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. 2 \^ 64 -1 bytes size in bytes
+
+ performance.cache-min-file-size Sets the minimum file size cached by the io-cache translator. Values same as "max" above. 0B size in bytes
+
+ performance.cache-refresh-timeout The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. 1 sec 0 - 61
+
+ performance.cache-size Size of the read cache. 32 MB size in bytes
+
+ server.allow-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. on On | Off
+
+ server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs
+
+ server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path
+
+ storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path
+ -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+ You can view the changed volume options using
+ the` # gluster volume info ` command. For more information, see ?.
+
+Expanding Volumes
+=================
+
+You can expand volumes, as needed, while the cluster is online and
+available. For example, you might want to add a brick to a distributed
+volume, thereby increasing the distribution and adding to the capacity
+of the GlusterFS volume.
+
+Similarly, you might want to add a group of bricks to a distributed
+replicated volume, increasing the capacity of the GlusterFS volume.
+
+> **Note**
+>
+> When expanding distributed replicated and distributed striped volumes,
+> you need to add a number of bricks that is a multiple of the replica
+> or stripe count. For example, to expand a distributed replicated
+> volume with a replica count of 2, you need to add bricks in multiples
+> of 2 (such as 4, 6, 8, etc.).
+
+**To expand a volume**
+
+1. On the first server in the cluster, probe the server to which you
+ want to add the new brick using the following command:
+
+ `# gluster peer probe `
+
+ For example:
+
+ # gluster peer probe server4
+ Probe successful
+
+2. Add the brick using the following command:
+
+ `# gluster volume add-brick `
+
+ For example:
+
+ # gluster volume add-brick test-volume server4:/exp4
+ Add Brick successful
+
+3. Check the volume information using the following command:
+
+ `# gluster volume info `
+
+ The command displays information similar to the following:
+
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+4. Rebalance the volume to ensure that all files are distributed to the
+ new brick.
+
+ You can use the rebalance command as described in ?.
+
+Shrinking Volumes
+=================
+
+You can shrink volumes, as needed, while the cluster is online and
+available. For example, you might need to remove a brick that has become
+inaccessible in a distributed volume due to hardware or network failure.
+
+> **Note**
+>
+> Data residing on the brick that you are removing will no longer be
+> accessible at the Gluster mount point. Note however that only the
+> configuration information is removed - you can continue to access the
+> data directly from the brick, as necessary.
+
+When shrinking distributed replicated and distributed striped volumes,
+you need to remove a number of bricks that is a multiple of the replica
+or stripe count. For example, to shrink a distributed striped volume
+with a stripe count of 2, you need to remove bricks in multiples of 2
+(such as 4, 6, 8, etc.). In addition, the bricks you are trying to
+remove must be from the same sub-volume (the same replica or stripe
+set).
+
+**To shrink a volume**
+
+1. Remove the brick using the following command:
+
+ `# gluster volume remove-brick ` `start`
+
+ For example, to remove server2:/exp2:
+
+ # gluster volume remove-brick test-volume server2:/exp2
+
+ Removing brick(s) can result in data loss. Do you want to Continue? (y/n)
+
+2. Enter "y" to confirm the operation. The command displays the
+ following message indicating that the remove brick operation is
+ successfully started:
+
+ Remove Brick successful
+
+3. (Optional) View the status of the remove brick operation using the
+ following command:
+
+ `# gluster volume remove-brick `` status`
+
+ For example, to view the status of remove brick operation on
+ server2:/exp2 brick:
+
+ # gluster volume remove-brick test-volume server2:/exp2 status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress
+
+4. Check the volume information using the following command:
+
+ `# gluster volume info `
+
+ The command displays information similar to the following:
+
+ # gluster volume info
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 3
+ Bricks:
+ Brick1: server1:/exp1
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+5. Rebalance the volume to ensure that all files are distributed to the
+ new brick.
+
+ You can use the rebalance command as described in ?.
+
+Migrating Volumes
+=================
+
+You can migrate the data from one brick to another, as needed, while the
+cluster is online and available.
+
+**To migrate a volume**
+
+1. Make sure the new brick, server5 in this example, is successfully
+ added to the cluster.
+
+ For more information, see ?.
+
+2. Migrate the data from one brick to another using the following
+ command:
+
+ ` # gluster volume replace-brick start`
+
+ For example, to migrate the data in server3:/exp3 to server5:/exp5
+ in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start
+ Replace brick start operation successful
+
+ > **Note**
+ >
+ > You need to have the FUSE package installed on the server on which
+ > you are running the replace-brick command for the command to work.
+
+3. To pause the migration operation, if needed, use the following
+ command:
+
+ `# gluster volume replace-brick pause `
+
+ For example, to pause the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause
+ Replace brick pause operation successful
+
+4. To abort the migration operation, if needed, use the following
+ command:
+
+ ` # gluster volume replace-brick abort `
+
+ For example, to abort the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort
+ Replace brick abort operation successful
+
+5. Check the status of the migration operation using the following
+ command:
+
+ ` # gluster volume replace-brick status `
+
+ For example, to check the data migration status from server3:/exp3
+ to server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status
+ Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile
+ Number of files migrated = 10567
+ Migration complete
+
+ The status command shows the current file being migrated along with
+ the current total number of files migrated. After completion of
+ migration, it displays Migration complete.
+
+6. Commit the migration of data from one brick to another using the
+ following command:
+
+ ` # gluster volume replace-brick commit `
+
+ For example, to commit the data migration from server3:/exp3 to
+ server5:/exp5 in test-volume:
+
+ # gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit
+ replace-brick commit successful
+
+7. Verify the migration of brick by viewing the volume info using the
+ following command:
+
+ `# gluster volume info `
+
+ For example, to check the volume information of new brick
+ server5:/exp5 in test-volume:
+
+ # gluster volume info test-volume
+ Volume Name: testvolume
+ Type: Replicate
+ Status: Started
+ Number of Bricks: 4
+ Transport-type: tcp
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server4:/exp4
+ Brick4: server5:/exp5
+
+ The new volume details are displayed.
+
+ The new volume details are displayed.
+
+ In the above example, previously, there were bricks; 1,2,3, and 4
+ and now brick 3 is replaced by brick 5.
+
+Rebalancing Volumes
+===================
+
+After expanding or shrinking a volume (using the add-brick and
+remove-brick commands respectively), you need to rebalance the data
+among the servers. New directories created after expanding or shrinking
+of the volume will be evenly distributed automatically. For all the
+existing directories, the distribution can be fixed by rebalancing the
+layout and/or data.
+
+This section describes how to rebalance GlusterFS volumes in your
+storage environment, using the following common scenarios:
+
+- Fix Layout - Fixes the layout changes so that the files can actually
+ go to newly added nodes. For more information, see ?.
+
+- Fix Layout and Migrate Data - Rebalances volume by fixing the layout
+ changes and migrating the existing data. For more information, see
+ ?.
+
+Rebalancing Volume to Fix Layout Changes
+----------------------------------------
+
+Fixing the layout is necessary because the layout structure is static
+for a given directory. In a scenario where new bricks have been added to
+the existing volume, newly created files in existing directories will
+still be distributed only among the old bricks. The
+`# gluster volume rebalance fix-layout start `command will fix the
+layout information so that the files can also go to newly added nodes.
+When this command is issued, all the file stat information which is
+already cached will get revalidated.
+
+A fix-layout rebalance will only fix the layout changes and does not
+migrate data. If you want to migrate the existing data,
+use`# gluster volume rebalance start ` command to rebalance data among
+the servers.
+
+**To rebalance a volume to fix layout changes**
+
+- Start the rebalance operation on any one of the server using the
+ following command:
+
+ `# gluster volume rebalance fix-layout start`
+
+ For example:
+
+ # gluster volume rebalance test-volume fix-layout start
+ Starting rebalance on volume test-volume has been successful
+
+Rebalancing Volume to Fix Layout and Migrate Data
+-------------------------------------------------
+
+After expanding or shrinking a volume (using the add-brick and
+remove-brick commands respectively), you need to rebalance the data
+among the servers.
+
+**To rebalance a volume to fix layout and migrate the existing data**
+
+- Start the rebalance operation on any one of the server using the
+ following command:
+
+ `# gluster volume rebalance start`
+
+ For example:
+
+ # gluster volume rebalance test-volume start
+ Starting rebalancing on volume test-volume has been successful
+
+- Start the migration operation forcefully on any one of the server
+ using the following command:
+
+ `# gluster volume rebalance start force`
+
+ For example:
+
+ # gluster volume rebalance test-volume start force
+ Starting rebalancing on volume test-volume has been successful
+
+Displaying Status of Rebalance Operation
+----------------------------------------
+
+You can display the status information about rebalance volume operation,
+as needed.
+
+**To view status of rebalance volume**
+
+- Check the status of the rebalance operation, using the following
+ command:
+
+ `# gluster volume rebalance status`
+
+ For example:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress
+
+ The time to complete the rebalance operation depends on the number
+ of files on the volume along with the corresponding file sizes.
+ Continue checking the rebalance status, verifying that the number of
+ files rebalanced or total files scanned keeps increasing.
+
+ For example, running the status command again might display a result
+ similar to the following:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress
+
+ The rebalance status displays the following when the rebalance is
+ complete:
+
+ # gluster volume rebalance test-volume status
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed
+
+Stopping Rebalance Operation
+----------------------------
+
+You can stop the rebalance operation, as needed.
+
+**To stop rebalance**
+
+- Stop the rebalance operation using the following command:
+
+ `# gluster volume rebalance stop`
+
+ For example:
+
+ # gluster volume rebalance test-volume stop
+ Node Rebalanced-files size scanned status
+ --------- ---------------- ---- ------- -----------
+ 617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
+ Stopped rebalance process on volume test-volume
+
+Stopping Volumes
+================
+
+To stop a volume
+
+1. Stop the volume using the following command:
+
+ `# gluster volume stop `
+
+ For example, to stop test-volume:
+
+ # gluster volume stop test-volume
+ Stopping volume will make its data inaccessible. Do you want to continue? (y/n)
+
+2. Enter `y` to confirm the operation. The output of the command
+ displays the following:
+
+ Stopping volume test-volume has been successful
+
+Deleting Volumes
+================
+
+To delete a volume
+
+1. Delete the volume using the following command:
+
+ `# gluster volume delete `
+
+ For example, to delete test-volume:
+
+ # gluster volume delete test-volume
+ Deleting volume will erase all information about the volume. Do you want to continue? (y/n)
+
+2. Enter `y` to confirm the operation. The command displays the
+ following:
+
+ Deleting volume test-volume has been successful
+
+Triggering Self-Heal on Replicate
+=================================
+
+In replicate module, previously you had to manually trigger a self-heal
+when a brick goes offline and comes back online, to bring all the
+replicas in sync. Now the pro-active self-heal daemon runs in the
+background, diagnoses issues and automatically initiates self-healing
+every 10 minutes on the files which requires*healing*.
+
+You can view the list of files that need *healing*, the list of files
+which are currently/previously *healed*, list of files which are in
+split-brain state, and you can manually trigger self-heal on the entire
+volume or only on the files which need *healing*.
+
+- Trigger self-heal only on the files which requires *healing*:
+
+ `# gluster volume heal `
+
+ For example, to trigger self-heal on files which requires *healing*
+ of test-volume:
+
+ # gluster volume heal test-volume
+ Heal operation on volume test-volume has been successful
+
+- Trigger self-heal on all the files of a volume:
+
+ `# gluster volume heal ` `full`
+
+ For example, to trigger self-heal on all the files of of
+ test-volume:
+
+ # gluster volume heal test-volume full
+ Heal operation on volume test-volume has been successful
+
+- View the list of files that needs *healing*:
+
+ `# gluster volume heal ` `info`
+
+ For example, to view the list of files on test-volume that needs
+ *healing*:
+
+ # gluster volume heal test-volume info
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick :/gfs/test-volume_1
+ Number of entries: 101
+ /95.txt
+ /32.txt
+ /66.txt
+ /35.txt
+ /18.txt
+ /26.txt
+ /47.txt
+ /55.txt
+ /85.txt
+ ...
+
+- View the list of files that are self-healed:
+
+ `# gluster volume heal ` `info healed`
+
+ For example, to view the list of files on test-volume that are
+ self-healed:
+
+ # gluster volume heal test-volume info healed
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick :/gfs/test-volume_1
+ Number of entries: 69
+ /99.txt
+ /93.txt
+ /76.txt
+ /11.txt
+ /27.txt
+ /64.txt
+ /80.txt
+ /19.txt
+ /41.txt
+ /29.txt
+ /37.txt
+ /46.txt
+ ...
+
+- View the list of files of a particular volume on which the self-heal
+ failed:
+
+ `# gluster volume heal ` `info failed`
+
+ For example, to view the list of files of test-volume that are not
+ self-healed:
+
+ # gluster volume heal test-volume info failed
+ Brick :/gfs/test-volume_0
+ Number of entries: 0
+
+ Brick server2:/gfs/test-volume_3
+ Number of entries: 72
+ /90.txt
+ /95.txt
+ /77.txt
+ /71.txt
+ /87.txt
+ /24.txt
+ ...
+
+- View the list of files of a particular volume which are in
+ split-brain state:
+
+ `# gluster volume heal ` `info split-brain`
+
+ For example, to view the list of files of test-volume which are in
+ split-brain state:
+
+ # gluster volume heal test-volume info split-brain
+ Brick server1:/gfs/test-volume_2
+ Number of entries: 12
+ /83.txt
+ /28.txt
+ /69.txt
+ ...
+
+ Brick :/gfs/test-volume_2
+ Number of entries: 12
+ /83.txt
+ /28.txt
+ /69.txt
+ ...
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
new file mode 100644
index 000000000..0312bd048
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md
@@ -0,0 +1,931 @@
+Monitoring your GlusterFS Workload
+==================================
+
+You can monitor the GlusterFS volumes on different parameters.
+Monitoring volumes helps in capacity planning and performance tuning
+tasks of the GlusterFS volume. Using these information, you can identify
+and troubleshoot issues.
+
+You can use Volume Top and Profile commands to view the performance and
+identify bottlenecks/hotspots of each brick of a volume. This helps
+system administrators to get vital performance information whenever
+performance needs to be probed.
+
+You can also perform statedump of the brick processes and nfs server
+process of a volume, and also view volume status and volume information.
+
+Running GlusterFS Volume Profile Command
+========================================
+
+GlusterFS Volume Profile command provides an interface to get the
+per-brick I/O information for each File Operation (FOP) of a volume. The
+per brick information helps in identifying bottlenecks in the storage
+system.
+
+This section describes how to run GlusterFS Volume Profile command by
+performing the following operations:
+
+- ?
+
+- ?
+
+- ?
+
+Start Profiling
+---------------
+
+You must start the Profiling to view the File Operation information for
+each brick.
+
+**To start profiling:**
+
+- Start profiling using the following command:
+
+`# gluster volume profile start `
+
+For example, to start profiling on test-volume:
+
+ # gluster volume profile test-volume start
+ Profiling started on test-volume
+
+When profiling on the volume is started, the following additional
+options are displayed in the Volume Info:
+
+ diagnostics.count-fop-hits: on
+
+ diagnostics.latency-measurement: on
+
+Displaying the I/0 Information
+------------------------------
+
+You can view the I/O information of each brick.
+
+To display I/O information:
+
+- Display the I/O information using the following command:
+
+`# gluster volume profile info`
+
+For example, to see the I/O information on test-volume:
+
+ # gluster volume profile test-volume info
+ Brick: Test:/export/2
+ Cumulative Stats:
+
+ Block 1b+ 32b+ 64b+
+ Size:
+ Read: 0 0 0
+ Write: 908 28 8
+
+ Block 128b+ 256b+ 512b+
+ Size:
+ Read: 0 6 4
+ Write: 5 23 16
+
+ Block 1024b+ 2048b+ 4096b+
+ Size:
+ Read: 0 52 17
+ Write: 15 120 846
+
+ Block 8192b+ 16384b+ 32768b+
+ Size:
+ Read: 52 8 34
+ Write: 234 134 286
+
+ Block 65536b+ 131072b+
+ Size:
+ Read: 118 622
+ Write: 1341 594
+
+
+ %-latency Avg- Min- Max- calls Fop
+ latency Latency Latency
+ ___________________________________________________________
+ 4.82 1132.28 21.00 800970.00 4575 WRITE
+ 5.70 156.47 9.00 665085.00 39163 READDIRP
+ 11.35 315.02 9.00 1433947.00 38698 LOOKUP
+ 11.88 1729.34 21.00 2569638.00 7382 FXATTROP
+ 47.35 104235.02 2485.00 7789367.00 488 FSYNC
+
+ ------------------
+
+ ------------------
+
+ Duration : 335
+
+ BytesRead : 94505058
+
+ BytesWritten : 195571980
+
+Stop Profiling
+--------------
+
+You can stop profiling the volume, if you do not need profiling
+information anymore.
+
+**To stop profiling**
+
+- Stop profiling using the following command:
+
+ `# gluster volume profile stop`
+
+ For example, to stop profiling on test-volume:
+
+ `# gluster volume profile stop`
+
+ `Profiling stopped on test-volume`
+
+Running GlusterFS Volume TOP Command
+====================================
+
+GlusterFS Volume Top command allows you to view the glusterfs bricks’
+performance metrics like read, write, file open calls, file read calls,
+file write calls, directory open calls, and directory real calls. The
+top command displays up to 100 results.
+
+This section describes how to run and view the results for the following
+GlusterFS Top commands:
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+- ?
+
+Viewing Open fd Count and Maximum fd Count
+------------------------------------------
+
+You can view both current open fd count (list of files that are
+currently the most opened and the count) on the brick and the maximum
+open fd count (count of files that are the currently open and the count
+of maximum number of files opened at any given point of time, since the
+servers are up and running). If the brick name is not specified, then
+open fd metrics of all the bricks belonging to the volume will be
+displayed.
+
+**To view open fd count and maximum fd count:**
+
+- View open fd count and maximum fd count using the following command:
+
+ `# gluster volume top open [brick ] [list-cnt ]`
+
+ For example, to view open fd count and maximum fd count on brick
+ server:/export of test-volume and list top 10 open calls:
+
+ `# gluster volume top open brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ `Current open fd's: 34 Max open fd's: 209 `
+
+ ==========Open file stats========
+
+ open file name
+ call count
+
+ 2 /clients/client0/~dmtmp/PARADOX/
+ COURSES.DB
+
+ 11 /clients/client0/~dmtmp/PARADOX/
+ ENROLL.DB
+
+ 11 /clients/client0/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 10 /clients/client0/~dmtmp/PWRPNT/
+ TIPS.PPT
+
+ 10 /clients/client0/~dmtmp/PWRPNT/
+ PCBENCHM.PPT
+
+ 9 /clients/client7/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client1/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client2/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client0/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+ 9 /clients/client8/~dmtmp/PARADOX/
+ STUDENTS.DB
+
+Viewing Highest File Read Calls
+-------------------------------
+
+You can view highest read calls on each brick. If brick name is not
+specified, then by default, list of 100 files will be displayed.
+
+**To view highest file Read calls:**
+
+- View highest file Read calls using the following command:
+
+ `# gluster volume top read [brick ] [list-cnt ] `
+
+ For example, to view highest Read calls on brick server:/export of
+ test-volume:
+
+ `# gluster volume top read brick list-cnt `
+
+ `Brick:` server:/export/dir1
+
+ ==========Read file stats========
+
+ read filename
+ call count
+
+ 116 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL
+
+ 54 /clients/client2/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client6/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client5/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client3/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client4/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client9/~dmtmp/SEED/LARGE.FIL
+
+ 54 /clients/client8/~dmtmp/SEED/LARGE.FIL
+
+Viewing Highest File Write Calls
+--------------------------------
+
+You can view list of files which has highest file write calls on each
+brick. If brick name is not specified, then by default, list of 100
+files will be displayed.
+
+**To view highest file Write calls:**
+
+- View highest file Write calls using the following command:
+
+ `# gluster volume top write [brick ] [list-cnt ] `
+
+ For example, to view highest Write calls on brick server:/export of
+ test-volume:
+
+ `# gluster volume top write brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ ==========Write file stats========
+ write call count filename
+
+ 83 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client7/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client1/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client2/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client0/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client8/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client5/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client4/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client6/~dmtmp/SEED/LARGE.FIL
+
+ 59 /clients/client3/~dmtmp/SEED/LARGE.FIL
+
+Viewing Highest Open Calls on Directories
+-----------------------------------------
+
+You can view list of files which has highest open calls on directories
+of each brick. If brick name is not specified, then the metrics of all
+the bricks belonging to that volume will be displayed.
+
+To view list of open calls on each directory
+
+- View list of open calls on each directory using the following
+ command:
+
+ `# gluster volume top opendir [brick ] [list-cnt ] `
+
+ For example, to view open calls on brick server:/export/ of
+ test-volume:
+
+ `# gluster volume top opendir brick list-cnt `
+
+ `Brick: server:/export/dir1 `
+
+ ==========Directory open stats========
+
+ Opendir count directory name
+
+ 1001 /clients/client0/~dmtmp
+
+ 454 /clients/client8/~dmtmp
+
+ 454 /clients/client2/~dmtmp
+
+ 454 /clients/client6/~dmtmp
+
+ 454 /clients/client5/~dmtmp
+
+ 454 /clients/client9/~dmtmp
+
+ 443 /clients/client0/~dmtmp/PARADOX
+
+ 408 /clients/client1/~dmtmp
+
+ 408 /clients/client7/~dmtmp
+
+ 402 /clients/client4/~dmtmp
+
+Viewing Highest Read Calls on Directory
+---------------------------------------
+
+You can view list of files which has highest directory read calls on
+each brick. If brick name is not specified, then the metrics of all the
+bricks belonging to that volume will be displayed.
+
+**To view list of highest directory read calls on each brick**
+
+- View list of highest directory read calls on each brick using the
+ following command:
+
+ `# gluster volume top readdir [brick ] [list-cnt ] `
+
+ For example, to view highest directory read calls on brick
+ server:/export of test-volume:
+
+ `# gluster volume top readdir brick list-cnt `
+
+ `Brick: `
+
+ ==========Directory readdirp stats========
+
+ readdirp count directory name
+
+ 1996 /clients/client0/~dmtmp
+
+ 1083 /clients/client0/~dmtmp/PARADOX
+
+ 904 /clients/client8/~dmtmp
+
+ 904 /clients/client2/~dmtmp
+
+ 904 /clients/client6/~dmtmp
+
+ 904 /clients/client5/~dmtmp
+
+ 904 /clients/client9/~dmtmp
+
+ 812 /clients/client1/~dmtmp
+
+ 812 /clients/client7/~dmtmp
+
+ 800 /clients/client4/~dmtmp
+
+Viewing List of Read Performance on each Brick
+----------------------------------------------
+
+You can view the read throughput of files on each brick. If brick name
+is not specified, then the metrics of all the bricks belonging to that
+volume will be displayed. The output will be the read throughput.
+
+ ==========Read throughput file stats========
+
+ read filename Time
+ through
+ put(MBp
+ s)
+
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ TRIDOTS.POT 15:38:36.894610
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ PCBENCHM.PPT 15:38:39.815310
+ 2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:52:53.631499
+
+ 2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.926198
+
+ 2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.930445
+
+ 2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.X04 15:38:40.549919
+
+ 2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ STUDENTS.VAL 15:52:53.298766
+
+ 2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ COURSES.DB 15:39:11.776780
+
+ 2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:10.251764
+
+ 2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
+ BASEMACH.DOC 15:39:09.336572
+
+This command will initiate a dd for the specified count and block size
+and measures the corresponding throughput.
+
+**To view list of read performance on each brick**
+
+- View list of read performance on each brick using the following
+ command:
+
+ `# gluster volume top read-perf [bs count ] [brick ] [list-cnt ]`
+
+ For example, to view read performance on brick server:/export/ of
+ test-volume, 256 block size of count 1, and list count 10:
+
+ `# gluster volume top read-perf bs 256 count 1 brick list-cnt `
+
+ `Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s `
+
+ ==========Read throughput file stats========
+
+ read filename Time
+ through
+ put(MBp
+ s)
+
+ 2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ TRIDOTS.POT 15:38:36.896486
+
+ 2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
+ PCBENCHM.PPT 15:38:39.815310
+
+ 2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:52:53.631499
+
+ 2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.926198
+
+ 2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.930445
+
+ 2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.X04 15:38:40.549919
+
+ 2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31
+ STUDENTS.VAL 15:52:53.298766
+
+ 2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31
+ COURSES.DB 15:39:11.776780
+
+ 2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:10.251764
+
+ 2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
+ BASEMACH.DOC 15:39:09.336572
+
+
+Viewing List of Write Performance on each Brick
+-----------------------------------------------
+
+You can view list of write throughput of files on each brick. If brick
+name is not specified, then the metrics of all the bricks belonging to
+that volume will be displayed. The output will be the write throughput.
+
+This command will initiate a dd for the specified count and block size
+and measures the corresponding throughput. To view list of write
+performance on each brick:
+
+- View list of write performance on each brick using the following
+ command:
+
+ `# gluster volume top write-perf [bs count ] [brick ] [list-cnt ] `
+
+ For example, to view write performance on brick server:/export/ of
+ test-volume, 256 block size of count 1, and list count 10:
+
+ `# gluster volume top write-perf bs 256 count 1 brick list-cnt `
+
+ `Brick`: server:/export/dir1
+
+ `256 bytes (256 B) copied, Throughput: 2.8 MB/s `
+
+ ==========Write throughput file stats========
+
+ write filename Time
+ throughput
+ (MBps)
+
+ 1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ SMALL.FIL 15:39:09.171494
+
+ 1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:39:09.73189
+
+ 949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:38:36.927426
+
+ 936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:38:36.933177
+ 897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:09.33628
+
+ 897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
+ MEDIUM.FIL 15:39:09.27713
+
+ 885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
+ SMALL.FIL 15:38:36.924271
+
+ 528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
+ LARGE.FIL 15:39:09.81893
+
+ 516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
+ FASTENER.MDB 15:39:01.797317
+
+Displaying Volume Information
+=============================
+
+You can display information about a specific volume, or all volumes, as
+needed.
+
+**To display volume information**
+
+- Display information about a specific volume using the following
+ command:
+
+ `# gluster volume info ``VOLNAME`
+
+ For example, to display information about test-volume:
+
+ # gluster volume info test-volume
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+- Display information about all volumes using the following command:
+
+ `# gluster volume info all`
+
+ # gluster volume info all
+
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+ Volume Name: mirror
+ Type: Distributed-Replicate
+ Status: Started
+ Number of Bricks: 2 X 2 = 4
+ Bricks:
+ Brick1: server1:/brick1
+ Brick2: server2:/brick2
+ Brick3: server3:/brick3
+ Brick4: server4:/brick4
+
+ Volume Name: Vol
+ Type: Distribute
+ Status: Started
+ Number of Bricks: 1
+ Bricks:
+ Brick: server:/brick6
+
+Performing Statedump on a Volume
+================================
+
+Statedump is a mechanism through which you can get details of all
+internal variables and state of the glusterfs process at the time of
+issuing the command.You can perform statedumps of the brick processes
+and nfs server process of a volume using the statedump command. The
+following options can be used to determine what information is to be
+dumped:
+
+- **mem** - Dumps the memory usage and memory pool details of the
+ bricks.
+
+- **iobuf** - Dumps iobuf details of the bricks.
+
+- **priv** - Dumps private information of loaded translators.
+
+- **callpool** - Dumps the pending calls of the volume.
+
+- **fd** - Dumps the open fd tables of the volume.
+
+- **inode** - Dumps the inode tables of the volume.
+
+**To display volume statedump**
+
+- Display statedump of a volume or NFS server using the following
+ command:
+
+ `# gluster volume statedump [nfs] [all|mem|iobuf|callpool|priv|fd|inode]`
+
+ For example, to display statedump of test-volume:
+
+ # gluster volume statedump test-volume
+ Volume statedump successful
+
+ The statedump files are created on the brick servers in the` /tmp`
+ directory or in the directory set using `server.statedump-path`
+ volume option. The naming convention of the dump file is
+ `<brick-path>.<brick-pid>.dump`.
+
+- By defult, the output of the statedump is stored at
+ ` /tmp/<brickname.PID.dump>` file on that particular server. Change
+ the directory of the statedump file using the following command:
+
+ `# gluster volume set server.statedump-path `
+
+ For example, to change the location of the statedump file of
+ test-volume:
+
+ # gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/
+ Set volume successful
+
+ You can view the changed path of the statedump file using the
+ following command:
+
+ `# gluster volume info `
+
+Displaying Volume Status
+========================
+
+You can display the status information about a specific volume, brick or
+all volumes, as needed. Status information can be used to understand the
+current status of the brick, nfs processes, and overall file system.
+Status information can also be used to monitor and debug the volume
+information. You can view status of the volume along with the following
+details:
+
+- **detail** - Displays additional information about the bricks.
+
+- **clients** - Displays the list of clients connected to the volume.
+
+- **mem** - Displays the memory usage and memory pool details of the
+ bricks.
+
+- **inode** - Displays the inode tables of the volume.
+
+- **fd** - Displays the open fd (file descriptors) tables of the
+ volume.
+
+- **callpool** - Displays the pending calls of the volume.
+
+**To display volume status**
+
+- Display information about a specific volume using the following
+ command:
+
+ `# gluster volume status [all| []] [detail|clients|mem|inode|fd|callpool]`
+
+ For example, to display information about test-volume:
+
+ # gluster volume status test-volume
+ STATUS OF VOLUME: test-volume
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/1 24009 Y 22445
+ --------------------------------------------------------
+ arch:/export/2 24010 Y 22450
+
+- Display information about all volumes using the following command:
+
+ `# gluster volume status all`
+
+ # gluster volume status all
+ STATUS OF VOLUME: volume-test
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/4 24010 Y 22455
+
+ STATUS OF VOLUME: test-volume
+ BRICK PORT ONLINE PID
+ --------------------------------------------------------
+ arch:/export/1 24009 Y 22445
+ --------------------------------------------------------
+ arch:/export/2 24010 Y 22450
+
+- Display additional information about the bricks using the following
+ command:
+
+ `# gluster volume status detail`
+
+ For example, to display additional information about the bricks of
+ test-volume:
+
+ # gluster volume status test-volume details
+ STATUS OF VOLUME: test-volume
+ -------------------------------------------
+ Brick : arch:/export/1
+ Port : 24009
+ Online : Y
+ Pid : 16977
+ File System : rootfs
+ Device : rootfs
+ Mount Options : rw
+ Disk Space Free : 13.8GB
+ Total Disk Space : 46.5GB
+ Inode Size : N/A
+ Inode Count : N/A
+ Free Inodes : N/A
+
+ Number of Bricks: 1
+ Bricks:
+ Brick: server:/brick6
+
+- Display the list of clients accessing the volumes using the
+ following command:
+
+ `# gluster volume status clients`
+
+ For example, to display the list of clients connected to
+ test-volume:
+
+ # gluster volume status test-volume clients
+ Brick : arch:/export/1
+ Clients connected : 2
+ Hostname Bytes Read BytesWritten
+ -------- --------- ------------
+ 127.0.0.1:1013 776 676
+ 127.0.0.1:1012 50440 51200
+
+- Display the memory usage and memory pool details of the bricks using
+ the following command:
+
+ `# gluster volume status mem`
+
+ For example, to display the memory usage and memory pool details of
+ the bricks of test-volume:
+
+ Memory status for volume : test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Mallinfo
+ --------
+ Arena : 434176
+ Ordblks : 2
+ Smblks : 0
+ Hblks : 12
+ Hblkhd : 40861696
+ Usmblks : 0
+ Fsmblks : 0
+ Uordblks : 332416
+ Fordblks : 101760
+ Keepcost : 100400
+
+ Mempool Stats
+ -------------
+ Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc
+ ---- -------- --------- ------------ ---------- --------
+ test-volume-server:fd_t 0 16384 92 57 5
+ test-volume-server:dentry_t 59 965 84 59 59
+ test-volume-server:inode_t 60 964 148 60 60
+ test-volume-server:rpcsvc_request_t 0 525 6372 351 2
+ glusterfs:struct saved_frame 0 4096 124 2 2
+ glusterfs:struct rpc_req 0 4096 2236 2 2
+ glusterfs:rpcsvc_request_t 1 524 6372 2 1
+ glusterfs:call_stub_t 0 1024 1220 288 1
+ glusterfs:call_stack_t 0 8192 2084 290 2
+ glusterfs:call_frame_t 0 16384 172 1728 6
+
+- Display the inode tables of the volume using the following command:
+
+ `# gluster volume status inode`
+
+ For example, to display the inode tables of the test-volume:
+
+ # gluster volume status test-volume inode
+ inode tables for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Active inodes:
+ GFID Lookups Ref IA type
+ ---- ------- --- -------
+ 6f3fe173-e07a-4209-abb6-484091d75499 1 9 2
+ 370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2
+
+ LRU inodes:
+ GFID Lookups Ref IA type
+ ---- ------- --- -------
+ 80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1
+ 3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1
+ 2ce0197d-87a9-451b-9094-9baa38121155 1 0 2
+
+- Display the open fd tables of the volume using the following
+ command:
+
+ `# gluster volume status fd`
+
+ For example, to display the open fd tables of the test-volume:
+
+ # gluster volume status test-volume fd
+
+ FD tables for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Connection 1:
+ RefCount = 0 MaxFDs = 128 FirstFree = 4
+ FD Entry PID RefCount Flags
+ -------- --- -------- -----
+ 0 26311 1 2
+ 1 26310 3 2
+ 2 26310 1 2
+ 3 26311 3 2
+
+ Connection 2:
+ RefCount = 0 MaxFDs = 128 FirstFree = 0
+ No open fds
+
+ Connection 3:
+ RefCount = 0 MaxFDs = 128 FirstFree = 0
+ No open fds
+
+- Display the pending calls of the volume using the following command:
+
+ `# gluster volume status callpool`
+
+ Each call has a call stack containing call frames.
+
+ For example, to display the pending calls of test-volume:
+
+ # gluster volume status test-volume
+
+ Pending calls for volume test-volume
+ ----------------------------------------------
+ Brick : arch:/export/1
+ Pending calls: 2
+ Call Stack1
+ UID : 0
+ GID : 0
+ PID : 26338
+ Unique : 192138
+ Frames : 7
+ Frame 1
+ Ref Count = 1
+ Translator = test-volume-server
+ Completed = No
+ Frame 2
+ Ref Count = 0
+ Translator = test-volume-posix
+ Completed = No
+ Parent = test-volume-access-control
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 3
+ Ref Count = 1
+ Translator = test-volume-access-control
+ Completed = No
+ Parent = repl-locks
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 4
+ Ref Count = 1
+ Translator = test-volume-locks
+ Completed = No
+ Parent = test-volume-io-threads
+ Wind From = iot_fsync_wrapper
+ Wind To = FIRST_CHILD (this)->fops->fsync
+ Frame 5
+ Ref Count = 1
+ Translator = test-volume-io-threads
+ Completed = No
+ Parent = test-volume-marker
+ Wind From = default_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 6
+ Ref Count = 1
+ Translator = test-volume-marker
+ Completed = No
+ Parent = /export/1
+ Wind From = io_stats_fsync
+ Wind To = FIRST_CHILD(this)->fops->fsync
+ Frame 7
+ Ref Count = 1
+ Translator = /export/1
+ Completed = No
+ Parent = test-volume-server
+ Wind From = server_fsync_resume
+ Wind To = bound_xl->fops->fsync
+
+
diff --git a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
new file mode 100644
index 000000000..4038523c8
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md
@@ -0,0 +1,419 @@
+Setting up GlusterFS Server Volumes
+===================================
+
+A volume is a logical collection of bricks where each brick is an export
+directory on a server in the trusted storage pool. Most of the gluster
+management operations are performed on the volume.
+
+To create a new volume in your storage environment, specify the bricks
+that comprise the volume. After you have created a new volume, you must
+start it before attempting to mount it.
+
+- Volumes of the following types can be created in your storage
+ environment:
+
+ - Distributed - Distributed volumes distributes files throughout
+ the bricks in the volume. You can use distributed volumes where
+ the requirement is to scale storage and the redundancy is either
+ not important or is provided by other hardware/software layers.
+ For more information, see ? .
+
+ - Replicated – Replicated volumes replicates files across bricks
+ in the volume. You can use replicated volumes in environments
+ where high-availability and high-reliability are critical. For
+ more information, see ?.
+
+ - Striped – Striped volumes stripes data across bricks in the
+ volume. For best results, you should use striped volumes only in
+ high concurrency environments accessing very large files. For
+ more information, see ?.
+
+ - Distributed Striped - Distributed striped volumes stripe data
+ across two or more nodes in the cluster. You should use
+ distributed striped volumes where the requirement is to scale
+ storage and in high concurrency environments accessing very
+ large files is critical. For more information, see ?.
+
+ - Distributed Replicated - Distributed replicated volumes
+ distributes files across replicated bricks in the volume. You
+ can use distributed replicated volumes in environments where the
+ requirement is to scale storage and high-reliability is
+ critical. Distributed replicated volumes also offer improved
+ read performance in most environments. For more information, see
+ ?.
+
+ - Distributed Striped Replicated – Distributed striped replicated
+ volumes distributes striped data across replicated bricks in the
+ cluster. For best results, you should use distributed striped
+ replicated volumes in highly concurrent environments where
+ parallel access of very large files and performance is critical.
+ In this release, configuration of this volume type is supported
+ only for Map Reduce workloads. For more information, see ?.
+
+ - Striped Replicated – Striped replicated volumes stripes data
+ across replicated bricks in the cluster. For best results, you
+ should use striped replicated volumes in highly concurrent
+ environments where there is parallel access of very large files
+ and performance is critical. In this release, configuration of
+ this volume type is supported only for Map Reduce workloads. For
+ more information, see ?.
+
+**To create a new volume**
+
+- Create a new volume :
+
+ `# gluster volume create [stripe | replica ] [transport tcp | rdma | tcp, rdma] `
+
+ For example, to create a volume called test-volume consisting of
+ server3:/exp3 and server4:/exp4:
+
+ # gluster volume create test-volume server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+Creating Distributed Volumes
+============================
+
+In a distributed volumes files are spread randomly across the bricks in
+the volume. Use distributed volumes where you need to scale storage and
+redundancy is either not important or is provided by other
+hardware/software layers.
+
+> **Note**
+>
+> Disk/server failure in distributed volumes can result in a serious
+> loss of data because directory contents are spread randomly across the
+> bricks in the volume.
+
+![][]
+
+**To create a distributed volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed volume:
+
+ `# gluster volume create [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed volume with four storage
+ servers using tcp:
+
+ # gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ (Optional) You can display the volume information:
+
+ # gluster volume info
+ Volume Name: test-volume
+ Type: Distribute
+ Status: Created
+ Number of Bricks: 4
+ Transport-type: tcp
+ Bricks:
+ Brick1: server1:/exp1
+ Brick2: server2:/exp2
+ Brick3: server3:/exp3
+ Brick4: server4:/exp4
+
+ For example, to create a distributed volume with four storage
+ servers over InfiniBand:
+
+ # gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Replicated Volumes
+===========================
+
+Replicated volumes create copies of files across multiple bricks in the
+volume. You can use replicated volumes in environments where
+high-availability and high-reliability are critical.
+
+> **Note**
+>
+> The number of bricks should be equal to of the replica count for a
+> replicated volume. To protect against server and disk failures, it is
+> recommended that the bricks of the volume are from different servers.
+
+![][1]
+
+**To create a replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the replicated volume:
+
+ `# gluster volume create [replica ] [transport tcp | rdma tcp,rdma] `
+
+ For example, to create a replicated volume with two storage servers:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Striped Volumes
+========================
+
+Striped volumes stripes data across bricks in the volume. For best
+results, you should use striped volumes only in high concurrency
+environments accessing very large files.
+
+> **Note**
+>
+> The number of bricks should be a equal to the stripe count for a
+> striped volume.
+
+![][2]
+
+**To create a striped volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the striped volume:
+
+ `# gluster volume create [stripe ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a striped volume across two storage servers:
+
+ # gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Striped Volumes
+====================================
+
+Distributed striped volumes stripes files across two or more nodes in
+the cluster. For best results, you should use distributed striped
+volumes where the requirement is to scale storage and in high
+concurrency environments accessing very large files is critical.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the stripe count for a
+> distributed striped volume.
+
+![][3]
+
+**To create a distributed striped volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed striped volume:
+
+ `# gluster volume create [stripe ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed striped volume across eight
+ storage servers:
+
+ # gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Replicated Volumes
+=======================================
+
+Distributes files across replicated bricks in the volume. You can use
+distributed replicated volumes in environments where the requirement is
+to scale storage and high-reliability is critical. Distributed
+replicated volumes also offer improved read performance in most
+environments.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the replica count for a
+> distributed replicated volume. Also, the order in which bricks are
+> specified has a great effect on data protection. Each replica\_count
+> consecutive bricks in the list you give will form a replica set, with
+> all replica sets combined into a volume-wide distribute set. To make
+> sure that replica-set members are not placed on the same node, list
+> the first brick on every server, then the second brick on every server
+> in the same order, and so on.
+
+![][4]
+
+**To create a distributed replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create the distributed replicated volume:
+
+ `# gluster volume create [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, four node distributed (replicated) volume with a
+ two-way mirror:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ For example, to create a six node distributed (replicated) volume
+ with a two-way mirror:
+
+ # gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Distributed Striped Replicated Volumes
+===============================================
+
+Distributed striped replicated volumes distributes striped data across
+replicated bricks in the cluster. For best results, you should use
+distributed striped replicated volumes in highly concurrent environments
+where parallel access of very large files and performance is critical.
+In this release, configuration of this volume type is supported only for
+Map Reduce workloads.
+
+> **Note**
+>
+> The number of bricks should be a multiples of number of stripe count
+> and replica count for a distributed striped replicated volume.
+
+**To create a distributed striped replicated volume**
+
+1. Create a trusted storage pool as described earlier in ?.
+
+2. Create a distributed striped replicated volume using the following
+ command:
+
+ `# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a distributed replicated striped volume
+ across eight storage servers:
+
+ # gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Creating Striped Replicated Volumes
+===================================
+
+Striped replicated volumes stripes data across replicated bricks in the
+cluster. For best results, you should use striped replicated volumes in
+highly concurrent environments where there is parallel access of very
+large files and performance is critical. In this release, configuration
+of this volume type is supported only for Map Reduce workloads.
+
+> **Note**
+>
+> The number of bricks should be a multiple of the replicate count and
+> stripe count for a striped replicated volume.
+
+![][5]
+
+**To create a striped replicated volume**
+
+1. Create a trusted storage pool consisting of the storage servers that
+ will comprise the volume.
+
+ For more information, see ?.
+
+2. Create a striped replicated volume :
+
+ `# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] `
+
+ For example, to create a striped replicated volume across four
+ storage servers:
+
+ # gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ To create a striped replicated volume across six storage servers:
+
+ # gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
+ Creation of test-volume has been successful
+ Please start the volume to access data.
+
+ If the transport type is not specified, *tcp* is used as the
+ default. You can also set additional options if required, such as
+ auth.allow or auth.reject. For more information, see ?
+
+ > **Note**
+ >
+ > Make sure you start your volumes before you try to mount them or
+ > else client operations after the mount will hang, see ? for
+ > details.
+
+Starting Volumes
+================
+
+You must start your volumes before you try to mount them.
+
+**To start a volume**
+
+- Start a volume:
+
+ `# gluster volume start `
+
+ For example, to start test-volume:
+
+ # gluster volume start test-volume
+ Starting test-volume has been successful
+
+ []: images/Distributed_Volume.png
+ [1]: images/Replicated_Volume.png
+ [2]: images/Striped_Volume.png
+ [3]: images/Distributed_Striped_Volume.png
+ [4]: images/Distributed_Replicated_Volume.png
+ [5]: images/Striped_Replicated_Volume.png
diff --git a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
new file mode 100644
index 000000000..85b28c952
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md
@@ -0,0 +1,641 @@
+Accessing Data - Setting Up GlusterFS Client
+============================================
+
+You can access gluster volumes in multiple ways. You can use Gluster
+Native Client method for high concurrency, performance and transparent
+failover in GNU/Linux clients. You can also use NFS v3 to access gluster
+volumes. Extensive testing has be done on GNU/Linux clients and NFS
+implementation in other operating system, such as FreeBSD, and Mac OS X,
+as well as Windows 7 (Professional and Up) and Windows Server 2003.
+Other NFS client implementations may work with gluster NFS server.
+
+You can use CIFS to access volumes when using Microsoft Windows as well
+as SAMBA clients. For this access method, Samba packages need to be
+present on the client side.
+
+Gluster Native Client
+=====================
+
+The Gluster Native Client is a FUSE-based client running in user space.
+Gluster Native Client is the recommended method for accessing volumes
+when high concurrency and high write performance is required.
+
+This section introduces the Gluster Native Client and explains how to
+install the software on client machines. This section also describes how
+to mount volumes on clients (both manually and automatically) and how to
+verify that the volume has mounted successfully.
+
+Installing the Gluster Native Client
+------------------------------------
+
+Before you begin installing the Gluster Native Client, you need to
+verify that the FUSE module is loaded on the client and has access to
+the required modules as follows:
+
+1. Add the FUSE loadable kernel module (LKM) to the Linux kernel:
+
+ `# modprobe fuse`
+
+2. Verify that the FUSE module is loaded:
+
+ `# dmesg | grep -i fuse `
+
+ `fuse init (API version 7.13)`
+
+### Installing on Red Hat Package Manager (RPM) Distributions
+
+To install Gluster Native Client on RPM distribution-based systems
+
+1. Install required prerequisites on the client using the following
+ command:
+
+ `$ sudo yum -y install openssh-server wget fuse fuse-libs openib libibverbs`
+
+2. Ensure that TCP and UDP ports 24007 and 24008 are open on all
+ Gluster servers. Apart from these ports, you need to open one port
+ for each brick starting from port 24009. For example: if you have
+ five bricks, you need to have ports 24009 to 24013 open.
+
+ You can use the following chains with iptables:
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
+
+ > **Note**
+ >
+ > If you already have iptable chains, make sure that the above
+ > ACCEPT rules precede the DROP rules. This can be achieved by
+ > providing a lower rule number than the DROP rule.
+
+3. Download the latest glusterfs, glusterfs-fuse, and glusterfs-rdma
+ RPM files to each client. The glusterfs package contains the Gluster
+ Native Client. The glusterfs-fuse package contains the FUSE
+ translator required for mounting on client systems and the
+ glusterfs-rdma packages contain OpenFabrics verbs RDMA module for
+ Infiniband.
+
+ You can download the software at [][].
+
+4. Install Gluster Native Client on the client.
+
+ `$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm `
+
+ `$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm `
+
+ `$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm`
+
+ > **Note**
+ >
+ > The RDMA module is only required when using Infiniband.
+
+### Installing on Debian-based Distributions
+
+To install Gluster Native Client on Debian-based distributions
+
+1. Install OpenSSH Server on each client using the following command:
+
+ `$ sudo apt-get install openssh-server vim wget`
+
+2. Download the latest GlusterFS .deb file and checksum to each client.
+
+ You can download the software at [][1].
+
+3. For each .deb file, get the checksum (using the following command)
+ and compare it against the checksum for that file in the md5sum
+ file.
+
+ `$ md5sum GlusterFS_DEB_file.deb `
+
+ The md5sum of the packages is available at: [][2]
+
+4. Uninstall GlusterFS v3.1 (or an earlier version) from the client
+ using the following command:
+
+ `$ sudo dpkg -r glusterfs `
+
+ (Optional) Run `$ sudo dpkg -purge glusterfs `to purge the
+ configuration files.
+
+5. Install Gluster Native Client on the client using the following
+ command:
+
+ `$ sudo dpkg -i GlusterFS_DEB_file `
+
+ For example:
+
+ `$ sudo dpkg -i glusterfs-3.3.x.deb `
+
+6. Ensure that TCP and UDP ports 24007 and 24008 are open on all
+ Gluster servers. Apart from these ports, you need to open one port
+ for each brick starting from port 24009. For example: if you have
+ five bricks, you need to have ports 24009 to 24013 open.
+
+ You can use the following chains with iptables:
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT `
+
+ `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT`
+
+ > **Note**
+ >
+ > If you already have iptable chains, make sure that the above
+ > ACCEPT rules precede the DROP rules. This can be achieved by
+ > providing a lower rule number than the DROP rule.
+
+### Performing a Source Installation
+
+To build and install Gluster Native Client from the source code
+
+1. Create a new directory using the following commands:
+
+ `# mkdir glusterfs `
+
+ `# cd glusterfs`
+
+2. Download the source code.
+
+ You can download the source at [][1].
+
+3. Extract the source code using the following command:
+
+ `# tar -xvzf SOURCE-FILE `
+
+4. Run the configuration utility using the following command:
+
+ `# ./configure `
+
+ `GlusterFS configure summary `
+
+ `================== `
+
+ `FUSE client : yes `
+
+ `Infiniband verbs : yes `
+
+ `epoll IO multiplex : yes `
+
+ `argp-standalone : no `
+
+ `fusermount : no `
+
+ `readline : yes`
+
+ The configuration summary shows the components that will be built
+ with Gluster Native Client.
+
+5. Build the Gluster Native Client software using the following
+ commands:
+
+ `# make `
+
+ `# make install`
+
+6. Verify that the correct version of Gluster Native Client is
+ installed, using the following command:
+
+ `# glusterfs –-version`
+
+Mounting Volumes
+----------------
+
+After installing the Gluster Native Client, you need to mount Gluster
+volumes to access data. There are two methods you can choose:
+
+- ?
+
+- ?
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+> **Note**
+>
+> Server names selected during creation of Volumes should be resolvable
+> in the client machine. You can use appropriate /etc/hosts entries or
+> DNS server to resolve server names to IP addresses.
+
+### Manually Mounting Volumes
+
+To manually mount a Gluster volume
+
+- To mount a volume, use the following command:
+
+ `# mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR`
+
+ For example:
+
+ `# mount -t glusterfs server1:/test-volume /mnt/glusterfs`
+
+ > **Note**
+ >
+ > The server specified in the mount command is only used to fetch
+ > the gluster configuration volfile describing the volume name.
+ > Subsequently, the client will communicate directly with the
+ > servers mentioned in the volfile (which might not even include the
+ > one used for mount).
+ >
+ > If you see a usage message like "Usage: mount.glusterfs", mount
+ > usually requires you to create a directory to be used as the mount
+ > point. Run "mkdir /mnt/glusterfs" before you attempt to run the
+ > mount command listed above.
+
+**Mounting Options**
+
+You can specify the following options when using the
+`mount -t glusterfs` command. Note that you need to separate all options
+with commas.
+
+backupvolfile-server=server-name
+
+volfile-max-fetch-attempts=number of attempts
+
+log-level=loglevel
+
+log-file=logfile
+
+transport=transport-type
+
+direct-io-mode=[enable|disable]
+
+For example:
+
+`# mount -t glusterfs -o backupvolfile-server=volfile_server2 --volfile-max-fetch-attempts=2 log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs`
+
+If `backupvolfile-server` option is added while mounting fuse client,
+when the first volfile server fails, then the server specified in
+`backupvolfile-server` option is used as volfile server to mount the
+client.
+
+In `--volfile-max-fetch-attempts=X` option, specify the number of
+attempts to fetch volume files while mounting a volume. This option is
+useful when you mount a server with multiple IP addresses or when
+round-robin DNS is configured for the server-name..
+
+### Automatically Mounting Volumes
+
+You can configure your system to automatically mount the Gluster volume
+each time your system starts.
+
+The server specified in the mount command is only used to fetch the
+gluster configuration volfile describing the volume name. Subsequently,
+the client will communicate directly with the servers mentioned in the
+volfile (which might not even include the one used for mount).
+
+**To automatically mount a Gluster volume**
+
+- To mount a volume, edit the /etc/fstab file and add the following
+ line:
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 `
+
+ For example:
+
+ `server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0`
+
+**Mounting Options**
+
+You can specify the following options when updating the /etc/fstab file.
+Note that you need to separate all options with commas.
+
+log-level=loglevel
+
+log-file=logfile
+
+transport=transport-type
+
+direct-io-mode=[enable|disable]
+
+For example:
+
+`HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 `
+
+### Testing Mounted Volumes
+
+To test mounted volumes
+
+- Use the following command:
+
+ `# mount `
+
+ If the gluster volume was successfully mounted, the output of the
+ mount command on the client will be similar to this example:
+
+ `server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072`
+
+- Use the following command:
+
+ `# df`
+
+ The output of df command on the client will display the aggregated
+ storage space from all the bricks in a volume similar to this
+ example:
+
+ `# df -h /mnt/glusterfs Filesystem Size Used Avail Use% Mounted on server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs`
+
+- Change to the directory and list the contents by entering the
+ following:
+
+ `# cd MOUNTDIR `
+
+ `# ls`
+
+- For example,
+
+ `# cd /mnt/glusterfs `
+
+ `# ls`
+
+NFS
+===
+
+You can use NFS v3 to access to gluster volumes. Extensive testing has
+be done on GNU/Linux clients and NFS implementation in other operating
+system, such as FreeBSD, and Mac OS X, as well as Windows 7
+(Professional and Up), Windows Server 2003, and others, may work with
+gluster NFS server implementation.
+
+GlusterFS now includes network lock manager (NLM) v4. NLM enables
+applications on NFSv3 clients to do record locking on files on NFS
+server. It is started automatically whenever the NFS server is run.
+
+You must install nfs-common package on both servers and clients (only
+for Debian-based) distribution.
+
+This section describes how to use NFS to mount Gluster volumes (both
+manually and automatically) and how to verify that the volume has been
+mounted successfully.
+
+Using NFS to Mount Volumes
+--------------------------
+
+You can use either of the following methods to mount Gluster volumes:
+
+- ?
+
+- ?
+
+**Prerequisite**: Install nfs-common package on both servers and clients
+(only for Debian-based distribution), using the following command:
+
+`$ sudo aptitude install nfs-common `
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+### Manually Mounting Volumes Using NFS
+
+To manually mount a Gluster volume using NFS
+
+- To mount a volume, use the following command:
+
+ `# mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR`
+
+ For example:
+
+ `# mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs`
+
+ > **Note**
+ >
+ > Gluster NFS server does not support UDP. If the NFS client you are
+ > using defaults to connecting using UDP, the following message
+ > appears:
+ >
+ > `requested NFS version or transport protocol is not supported`.
+
+ **To connect using TCP**
+
+- Add the following option to the mount command:
+
+ `-o mountproto=tcp `
+
+ For example:
+
+ `# mount -o mountproto=tcp -t nfs server1:/test-volume /mnt/glusterfs`
+
+**To mount Gluster NFS server from a Solaris client**
+
+- Use the following command:
+
+ `# mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR`
+
+ For example:
+
+ ` # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs`
+
+### Automatically Mounting Volumes Using NFS
+
+You can configure your system to automatically mount Gluster volumes
+using NFS each time the system starts.
+
+**To automatically mount a Gluster volume using NFS**
+
+- To mount a volume, edit the /etc/fstab file and add the following
+ line:
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0`
+
+ For example,
+
+ `server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0`
+
+ > **Note**
+ >
+ > Gluster NFS server does not support UDP. If the NFS client you are
+ > using defaults to connecting using UDP, the following message
+ > appears:
+ >
+ > `requested NFS version or transport protocol is not supported.`
+
+ To connect using TCP
+
+- Add the following entry in /etc/fstab file :
+
+ `HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,mountproto=tcp 0 0`
+
+ For example,
+
+ `server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0`
+
+**To automount NFS mounts**
+
+Gluster supports \*nix standard method of automounting NFS mounts.
+Update the /etc/auto.master and /etc/auto.misc and restart the autofs
+service. After that, whenever a user or process attempts to access the
+directory it will be mounted in the background.
+
+### Testing Volumes Mounted Using NFS
+
+You can confirm that Gluster directories are mounting successfully.
+
+**To test mounted volumes**
+
+- Use the mount command by entering the following:
+
+ `# mount`
+
+ For example, the output of the mount command on the client will
+ display an entry like the following:
+
+ `server1:/test-volume on /mnt/glusterfs type nfs (rw,vers=3,addr=server1)`
+
+- Use the df command by entering the following:
+
+ `# df`
+
+ For example, the output of df command on the client will display the
+ aggregated storage space from all the bricks in a volume.
+
+ # df -h /mnt/glusterfs
+ Filesystem Size Used Avail Use% Mounted on
+ server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs
+
+- Change to the directory and list the contents by entering the
+ following:
+
+ `# cd MOUNTDIR`
+
+ `# ls`
+
+ For example,
+
+ `
+
+ `
+
+ `# ls`
+
+CIFS
+====
+
+You can use CIFS to access to volumes when using Microsoft Windows as
+well as SAMBA clients. For this access method, Samba packages need to be
+present on the client side. You can export glusterfs mount point as the
+samba export, and then mount it using CIFS protocol.
+
+This section describes how to mount CIFS shares on Microsoft
+Windows-based clients (both manually and automatically) and how to
+verify that the volume has mounted successfully.
+
+> **Note**
+>
+> CIFS access using the Mac OS X Finder is not supported, however, you
+> can use the Mac OS X command line to access Gluster volumes using
+> CIFS.
+
+Using CIFS to Mount Volumes
+---------------------------
+
+You can use either of the following methods to mount Gluster volumes:
+
+- ?
+
+- ?
+
+After mounting a volume, you can test the mounted volume using the
+procedure described in ?.
+
+You can also use Samba for exporting Gluster Volumes through CIFS
+protocol.
+
+### Exporting Gluster Volumes Through Samba
+
+We recommend you to use Samba for exporting Gluster volumes through the
+CIFS protocol.
+
+**To export volumes through CIFS protocol**
+
+1. Mount a Gluster volume. For more information on mounting volumes,
+ see ?.
+
+2. Setup Samba configuration to export the mount point of the Gluster
+ volume.
+
+ For example, if a Gluster volume is mounted on /mnt/gluster, you
+ must edit smb.conf file to enable exporting this through CIFS. Open
+ smb.conf file in an editor and add the following lines for a simple
+ configuration:
+
+ [glustertest]
+
+ comment = For testing a Gluster volume exported through CIFS
+
+ path = /mnt/glusterfs
+
+ read only = no
+
+ guest ok = yes
+
+Save the changes and start the smb service using your systems init
+scripts (/etc/init.d/smb [re]start).
+
+> **Note**
+>
+> To be able mount from any server in the trusted storage pool, you must
+> repeat these steps on each Gluster node. For more advanced
+> configurations, see Samba documentation.
+
+### Manually Mounting Volumes Using CIFS
+
+You can manually mount Gluster volumes using CIFS on Microsoft
+Windows-based client machines.
+
+**To manually mount a Gluster volume using CIFS**
+
+1. Using Windows Explorer, choose **Tools \> Map Network Drive…** from
+ the menu. The **Map Network Drive**window appears.
+
+2. Choose the drive letter using the **Drive** drop-down list.
+
+3. Click **Browse**, select the volume to map to the network drive, and
+ click **OK**.
+
+4. Click **Finish.**
+
+The network drive (mapped to the volume) appears in the Computer window.
+
+**Alternatively, to manually mount a Gluster volume using CIFS.**
+
+- Click **Start \> Run** and enter the following:
+
+ `
+
+ `
+
+ For example:
+
+ `
+
+ `
+
+### Automatically Mounting Volumes Using CIFS
+
+You can configure your system to automatically mount Gluster volumes
+using CIFS on Microsoft Windows-based clients each time the system
+starts.
+
+**To automatically mount a Gluster volume using CIFS**
+
+The network drive (mapped to the volume) appears in the Computer window
+and is reconnected each time the system starts.
+
+1. Using Windows Explorer, choose **Tools \> Map Network Drive…** from
+ the menu. The **Map Network Drive**window appears.
+
+2. Choose the drive letter using the **Drive** drop-down list.
+
+3. Click **Browse**, select the volume to map to the network drive, and
+ click **OK**.
+
+4. Click the **Reconnect** at logon checkbox.
+
+5. Click **Finish.**
+
+### Testing Volumes Mounted Using CIFS
+
+You can confirm that Gluster directories are mounting successfully by
+navigating to the directory using Windows Explorer.
+
+ []: http://bits.gluster.com/gluster/glusterfs/3.3.0qa30/x86_64/
+ [1]: http://www.gluster.org/download/
+ [2]: http://download.gluster.com/pub/gluster/glusterfs
diff --git a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
new file mode 100644
index 000000000..43251cd01
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md
@@ -0,0 +1,70 @@
+Managing the glusterd Service
+=============================
+
+After installing GlusterFS, you must start glusterd service. The
+glusterd service serves as the Gluster elastic volume manager,
+overseeing glusterfs processes, and co-ordinating dynamic volume
+operations, such as adding and removing volumes across multiple storage
+servers non-disruptively.
+
+This section describes how to start the glusterd service in the
+following ways:
+
+- ?
+
+- ?
+
+> **Note**
+>
+> You must start glusterd on all GlusterFS servers.
+
+Starting and Stopping glusterd Manually
+=======================================
+
+This section describes how to start and stop glusterd manually
+
+- To start glusterd manually, enter the following command:
+
+ `# /etc/init.d/glusterd start `
+
+- To stop glusterd manually, enter the following command:
+
+ `# /etc/init.d/glusterd stop`
+
+Starting glusterd Automatically
+===============================
+
+This section describes how to configure the system to automatically
+start the glusterd service every time the system boots.
+
+To automatically start the glusterd service every time the system boots,
+enter the following from the command line:
+
+`# chkconfig glusterd on `
+
+Red Hat-based Systems
+---------------------
+
+To configure Red Hat-based systems to automatically start the glusterd
+service every time the system boots, enter the following from the
+command line:
+
+`# chkconfig glusterd on `
+
+Debian-based Systems
+--------------------
+
+To configure Debian-based systems to automatically start the glusterd
+service every time the system boots, enter the following from the
+command line:
+
+`# update-rc.d glusterd defaults`
+
+Systems Other than Red Hat and Debain
+-------------------------------------
+
+To configure systems other than Red Hat or Debian to automatically start
+the glusterd service every time the system boots, enter the following
+entry to the*/etc/rc.local* file:
+
+`# echo "glusterd" >> /etc/rc.local `
diff --git a/doc/admin-guide/en-US/markdown/admin_storage_pools.md b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
new file mode 100644
index 000000000..2a35cbea5
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_storage_pools.md
@@ -0,0 +1,73 @@
+Setting up Trusted Storage Pools
+================================
+
+Before you can configure a GlusterFS volume, you must create a trusted
+storage pool consisting of the storage servers that provides bricks to a
+volume.
+
+A storage pool is a trusted network of storage servers. When you start
+the first server, the storage pool consists of that server alone. To add
+additional storage servers to the storage pool, you can use the probe
+command from a storage server that is already trusted.
+
+> **Note**
+>
+> Do not self-probe the first server/localhost.
+
+The GlusterFS service must be running on all storage servers that you
+want to add to the storage pool. See ? for more information.
+
+Adding Servers to Trusted Storage Pool
+======================================
+
+To create a trusted storage pool, add servers to the trusted storage
+pool
+
+1. The hostnames used to create the storage pool must be resolvable by
+ DNS.
+
+ To add a server to the storage pool:
+
+ `# gluster peer probe `
+
+ For example, to create a trusted storage pool of four servers, add
+ three servers to the storage pool from server1:
+
+ # gluster peer probe server2
+ Probe successful
+
+ # gluster peer probe server3
+ Probe successful
+
+ # gluster peer probe server4
+ Probe successful
+
+2. Verify the peer status from the first server using the following
+ commands:
+
+ # gluster peer status
+ Number of Peers: 3
+
+ Hostname: server2
+ Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5
+ State: Peer in Cluster (Connected)
+
+ Hostname: server3
+ Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7
+ State: Peer in Cluster (Connected)
+
+ Hostname: server4
+ Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
+ State: Peer in Cluster (Connected)
+
+Removing Servers from the Trusted Storage Pool
+==============================================
+
+To remove a server from the storage pool:
+
+`# gluster peer detach`
+
+For example, to remove server4 from the trusted storage pool:
+
+ # gluster peer detach server4
+ Detach successful
diff --git a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
new file mode 100644
index 000000000..88fb85c24
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md
@@ -0,0 +1,543 @@
+Troubleshooting GlusterFS
+=========================
+
+This section describes how to manage GlusterFS logs and most common
+troubleshooting scenarios related to GlusterFS.
+
+Managing GlusterFS Logs
+=======================
+
+This section describes how to manage GlusterFS logs by performing the
+following operation:
+
+- Rotating Logs
+
+Rotating Logs
+-------------
+
+Administrators can rotate the log file in a volume, as needed.
+
+**To rotate a log file**
+
+- Rotate the log file using the following command:
+
+ `# gluster volume log rotate `
+
+ For example, to rotate the log file on test-volume:
+
+ # gluster volume log rotate test-volume
+ log rotate successful
+
+ > **Note**
+ >
+ > When a log file is rotated, the contents of the current log file
+ > are moved to log-file- name.epoch-time-stamp.
+
+Troubleshooting Geo-replication
+===============================
+
+This section describes the most common troubleshooting scenarios related
+to GlusterFS Geo-replication.
+
+Locating Log Files
+------------------
+
+For every Geo-replication session, the following three log files are
+associated to it (four, if the slave is a gluster volume):
+
+- Master-log-file - log file for the process which monitors the Master
+ volume
+
+- Slave-log-file - log file for process which initiates the changes in
+ slave
+
+- Master-gluster-log-file - log file for the maintenance mount point
+ that Geo-replication module uses to monitor the master volume
+
+- Slave-gluster-log-file - is the slave's counterpart of it
+
+**Master Log File**
+
+To get the Master-log-file for geo-replication, use the following
+command:
+
+`gluster volume geo-replication config log-file`
+
+For example:
+
+`# gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file `
+
+**Slave Log File**
+
+To get the log file for Geo-replication on slave (glusterd must be
+running on slave machine), use the following commands:
+
+1. On master, run the following command:
+
+ `# gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 `
+
+ Displays the session owner details.
+
+2. On slave, run the following command:
+
+ `# gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log `
+
+3. Replace the session owner details (output of Step 1) to the output
+ of the Step 2 to get the location of the log file.
+
+ `/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log`
+
+Rotating Geo-replication Logs
+-----------------------------
+
+Administrators can rotate the log file of a particular master-slave
+session, as needed. When you run geo-replication's ` log-rotate`
+command, the log file is backed up with the current timestamp suffixed
+to the file name and signal is sent to gsyncd to start logging to a new
+log file.
+
+**To rotate a geo-replication log file**
+
+- Rotate log file for a particular master-slave session using the
+ following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file of master `Volume1` and slave
+ `example.com:/data/remote_dir` :
+
+ # gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate
+ log rotate successful
+
+- Rotate log file for all sessions for a master volume using the
+ following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file of master `Volume1`:
+
+ # gluster volume geo-replication Volume1 log rotate
+ log rotate successful
+
+- Rotate log file for all sessions using the following command:
+
+ `# gluster volume geo-replication log-rotate`
+
+ For example, to rotate the log file for all sessions:
+
+ # gluster volume geo-replication log rotate
+ log rotate successful
+
+Synchronization is not complete
+-------------------------------
+
+**Description**: GlusterFS Geo-replication did not synchronize the data
+completely but still the geo- replication status displayed is OK.
+
+**Solution**: You can enforce a full sync of the data by erasing the
+index and restarting GlusterFS Geo- replication. After restarting,
+GlusterFS Geo-replication begins synchronizing all the data. All files
+are compared using checksum, which can be a lengthy and high resource
+utilization operation on large data sets. If the error situation
+persists, contact Red Hat Support.
+
+For more information about erasing index, see ?.
+
+Issues in Data Synchronization
+------------------------------
+
+**Description**: Geo-replication display status as OK, but the files do
+not get synced, only directories and symlink gets synced with the
+following error message in the log:
+
+[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to
+sync ./some\_file\`
+
+**Solution**: Geo-replication invokes rsync v3.0.0 or higher on the host
+and the remote machine. You must verify if you have installed the
+required version.
+
+Geo-replication status displays Faulty very often
+-------------------------------------------------
+
+**Description**: Geo-replication displays status as faulty very often
+with a backtrace similar to the following:
+
+2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception]
+\<top\>: FAIL: Traceback (most recent call last): File
+"/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line
+152, in twraptf(\*aa) File
+"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in
+listen rid, exc, res = recv(self.inf) File
+"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in
+recv return pickle.load(inf) EOFError
+
+**Solution**: This error indicates that the RPC communication between
+the master gsyncd module and slave gsyncd module is broken and this can
+happen for various reasons. Check if it satisfies all the following
+pre-requisites:
+
+- Password-less SSH is set up properly between the host and the remote
+ machine.
+
+- If FUSE is installed in the machine, because geo-replication module
+ mounts the GlusterFS volume using FUSE to sync data.
+
+- If the **Slave** is a volume, check if that volume is started.
+
+- If the Slave is a plain directory, verify if the directory has been
+ created already with the required permissions.
+
+- If GlusterFS 3.2 or higher is not installed in the default location
+ (in Master) and has been prefixed to be installed in a custom
+ location, configure the `gluster-command` for it to point to the
+ exact location.
+
+- If GlusterFS 3.2 or higher is not installed in the default location
+ (in slave) and has been prefixed to be installed in a custom
+ location, configure the `remote-gsyncd-command` for it to point to
+ the exact place where gsyncd is located.
+
+Intermediate Master goes to Faulty State
+----------------------------------------
+
+**Description**: In a cascading set-up, the intermediate master goes to
+faulty state with the following log:
+
+raise RuntimeError ("aborting on uuid change from %s to %s" % \\
+RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f-
+4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154
+
+**Solution**: In a cascading set-up the Intermediate master is loyal to
+the original primary master. The above log means that the
+geo-replication module has detected change in primary master. If this is
+the desired behavior, delete the config option volume-id in the session
+initiated from the intermediate master.
+
+Troubleshooting POSIX ACLs
+==========================
+
+This section describes the most common troubleshooting issues related to
+POSIX ACLs.
+
+setfacl command fails with “setfacl: \<file or directory name\>: Operation not supported” error
+-----------------------------------------------------------------------------------------------
+
+You may face this error when the backend file systems in one of the
+servers is not mounted with the "-o acl" option. The same can be
+confirmed by viewing the following error message in the log file of the
+server "Posix access control list is not supported".
+
+**Solution**: Remount the backend file system with "-o acl" option. For
+more information, see ?.
+
+Troubleshooting Hadoop Compatible Storage
+=========================================
+
+This section describes the most common troubleshooting issues related to
+Hadoop Compatible Storage.
+
+Time Sync
+---------
+
+Running MapReduce job may throw exceptions if the time is out-of-sync on
+the hosts in the cluster.
+
+**Solution**: Sync the time on all hosts using ntpd program.
+
+Troubleshooting NFS
+===================
+
+This section describes the most common troubleshooting issues related to
+NFS .
+
+mount command on NFS client fails with “RPC Error: Program not registered”
+--------------------------------------------------------------------------
+
+Start portmap or rpcbind service on the NFS server.
+
+This error is encountered when the server has not started correctly.
+
+On most Linux distributions this is fixed by starting portmap:
+
+`$ /etc/init.d/portmap start`
+
+On some distributions where portmap has been replaced by rpcbind, the
+following command is required:
+
+`$ /etc/init.d/rpcbind start `
+
+After starting portmap or rpcbind, gluster NFS server needs to be
+restarted.
+
+NFS server start-up fails with “Port is already in use” error in the log file."
+-------------------------------------------------------------------------------
+
+Another Gluster NFS server is running on the same machine.
+
+This error can arise in case there is already a Gluster NFS server
+running on the same machine. This situation can be confirmed from the
+log file, if the following error lines exist:
+
+ [2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use
+ [2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use
+ [2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection
+ [2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed
+ [2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+ [2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
+ [2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols
+
+To resolve this error one of the Gluster NFS servers will have to be
+shutdown. At this time, Gluster NFS server does not support running
+multiple NFS servers on the same machine.
+
+mount command fails with “rpc.statd” related error message
+----------------------------------------------------------
+
+If the mount command fails with the following error message:
+
+mount.nfs: rpc.statd is not running but is required for remote locking.
+mount.nfs: Either use '-o nolock' to keep locks local, or start statd.
+
+Start rpc.statd
+
+For NFS clients to mount the NFS server, rpc.statd service must be
+running on the clients.
+
+Start rpc.statd service by running the following command:
+
+`$ rpc.statd `
+
+mount command takes too long to finish.
+---------------------------------------
+
+Start rpcbind service on the NFS client.
+
+The problem is that the rpcbind or portmap service is not running on the
+NFS client. The resolution for this is to start either of these services
+by running the following command:
+
+`$ /etc/init.d/portmap start`
+
+On some distributions where portmap has been replaced by rpcbind, the
+following command is required:
+
+`$ /etc/init.d/rpcbind start`
+
+NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log.
+----------------------------------------------------------------------------------------------------------------------------------------------
+
+NFS start-up can succeed but the initialization of the NFS service can
+still fail preventing clients from accessing the mount points. Such a
+situation can be confirmed from the following error messages in the log
+file:
+
+ [2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap
+ [2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed
+ [2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+ [2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
+ [2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols
+ [2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap
+ [2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
+ [2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
+
+1. Start portmap or rpcbind service on the NFS server.
+
+ On most Linux distributions, portmap can be started using the
+ following command:
+
+ `$ /etc/init.d/portmap start `
+
+ On some distributions where portmap has been replaced by rpcbind,
+ run the following command:
+
+ `$ /etc/init.d/rpcbind start `
+
+ After starting portmap or rpcbind, gluster NFS server needs to be
+ restarted.
+
+2. Stop another NFS server running on the same machine.
+
+ Such an error is also seen when there is another NFS server running
+ on the same machine but it is not the Gluster NFS server. On Linux
+ systems, this could be the kernel NFS server. Resolution involves
+ stopping the other NFS server or not running the Gluster NFS server
+ on the machine. Before stopping the kernel NFS server, ensure that
+ no critical service depends on access to that NFS server's exports.
+
+ On Linux, kernel NFS servers can be stopped by using either of the
+ following commands depending on the distribution in use:
+
+ `$ /etc/init.d/nfs-kernel-server stop`
+
+ `$ /etc/init.d/nfs stop`
+
+3. Restart Gluster NFS server.
+
+mount command fails with NFS server failed error.
+-------------------------------------------------
+
+mount command fails with following error
+
+*mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).*
+
+Perform one of the following to resolve this issue:
+
+1. Disable name lookup requests from NFS server to a DNS server.
+
+ The NFS server attempts to authenticate NFS clients by performing a
+ reverse DNS lookup to match hostnames in the volume file with the
+ client IP addresses. There can be a situation where the NFS server
+ either is not able to connect to the DNS server or the DNS server is
+ taking too long to responsd to DNS request. These delays can result
+ in delayed replies from the NFS server to the NFS client resulting
+ in the timeout error seen above.
+
+ NFS server provides a work-around that disables DNS requests,
+ instead relying only on the client IP addresses for authentication.
+ The following option can be added for successful mounting in such
+ situations:
+
+ `option rpc-auth.addr.namelookup off `
+
+ > **Note**
+ >
+ > Note: Remember that disabling the NFS server forces authentication
+ > of clients to use only IP addresses and if the authentication
+ > rules in the volume file use hostnames, those authentication rules
+ > will fail and disallow mounting for those clients.
+
+ or
+
+2. NFS version used by the NFS client is other than version 3.
+
+ Gluster NFS server supports version 3 of NFS protocol. In recent
+ Linux kernels, the default NFS version has been changed from 3 to 4.
+ It is possible that the client machine is unable to connect to the
+ Gluster NFS server because it is using version 4 messages which are
+ not understood by Gluster NFS server. The timeout can be resolved by
+ forcing the NFS client to use version 3. The **vers** option to
+ mount command is used for this purpose:
+
+ `$ mount -o vers=3 `
+
+showmount fails with clnt\_create: RPC: Unable to receive
+---------------------------------------------------------
+
+Check your firewall setting to open ports 111 for portmap
+requests/replies and Gluster NFS server requests/replies. Gluster NFS
+server operates over the following port numbers: 38465, 38466, and
+38467.
+
+For more information, see ?.
+
+Application fails with "Invalid argument" or "Value too large for defined data type" error.
+-------------------------------------------------------------------------------------------
+
+These two errors generally happen for 32-bit nfs clients or applications
+that do not support 64-bit inode numbers or large files. Use the
+following option from the CLI to make Gluster NFS return 32-bit inode
+numbers instead: nfs.enable-ino32 \<on|off\>
+
+Applications that will benefit are those that were either:
+
+- built 32-bit and run on 32-bit machines such that they do not
+ support large files by default
+
+- built 32-bit on 64-bit systems
+
+This option is disabled by default so NFS returns 64-bit inode numbers
+by default.
+
+Applications which can be rebuilt from source are recommended to rebuild
+using the following flag with gcc:
+
+` -D_FILE_OFFSET_BITS=64`
+
+Troubleshooting File Locks
+==========================
+
+In GlusterFS 3.3 you can use `statedump` command to list the locks held
+on files. The statedump output also provides information on each lock
+with its range, basename, PID of the application holding the lock, and
+so on. You can analyze the output to know about the locks whose
+owner/application is no longer running or interested in that lock. After
+ensuring that the no application is using the file, you can clear the
+lock using the following `clear lock` command:
+
+`# `
+
+For more information on performing `statedump`, see ?
+
+**To identify locked file and clear locks**
+
+1. Perform statedump on the volume to view the files that are locked
+ using the following command:
+
+ `# gluster volume statedump inode`
+
+ For example, to display statedump of test-volume:
+
+ # gluster volume statedump test-volume
+ Volume statedump successful
+
+ The statedump files are created on the brick servers in the` /tmp`
+ directory or in the directory set using `server.statedump-path`
+ volume option. The naming convention of the dump file is
+ `<brick-path>.<brick-pid>.dump`.
+
+ The following are the sample contents of the statedump file. It
+ indicates that GlusterFS has entered into a state where there is an
+ entry lock (entrylk) and an inode lock (inodelk). Ensure that those
+ are stale locks and no resources own them.
+
+ [xlator.features.locks.vol-locks.inode]
+ path=/
+ mandatory=0
+ entrylk-count=1
+ lock-dump.domain.domain=vol-replicate-0
+ xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
+
+ conn.2.bound_xl./gfs/brick1.hashsize=14057
+ conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode
+ conn.2.bound_xl./gfs/brick1.lru_limit=16384
+ conn.2.bound_xl./gfs/brick1.active_size=2
+ conn.2.bound_xl./gfs/brick1.lru_size=0
+ conn.2.bound_xl./gfs/brick1.purge_size=0
+
+ [conn.2.bound_xl./gfs/brick1.active.1]
+ gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07
+ nlookup=1
+ ref=2
+ ia_type=1
+ [xlator.features.locks.vol-locks.inode]
+ path=/file1
+ mandatory=0
+ inodelk-count=1
+ lock-dump.domain.domain=vol-replicate-0
+ inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
+
+2. Clear the lock using the following command:
+
+ `# `
+
+ For example, to clear the entry lock on `file1` of test-volume:
+
+ # gluster volume clear-locks test-volume / kind granted entry file1
+ Volume clear-locks successful
+ vol-locks: entry blocked locks=0 granted locks=1
+
+3. Clear the inode lock using the following command:
+
+ `# `
+
+ For example, to clear the inode lock on `file1` of test-volume:
+
+ # gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0
+ Volume clear-locks successful
+ vol-locks: inode blocked locks=0 granted locks=1
+
+ You can perform statedump on test-volume again to verify that the
+ above inode and entry locks are cleared.
+
+
diff --git a/doc/admin-guide/en-US/markdown/gfs_introduction.md b/doc/admin-guide/en-US/markdown/gfs_introduction.md
new file mode 100644
index 000000000..fd2c53dc9
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/gfs_introduction.md
@@ -0,0 +1,50 @@
+Introducing Gluster File System
+===============================
+
+GlusterFS is an open source, clustered file system capable of scaling to
+several petabytes and handling thousands of clients. GlusterFS can be
+flexibly combined with commodity physical, virtual, and cloud resources
+to deliver highly available and performant enterprise storage at a
+fraction of the cost of traditional solutions.
+
+GlusterFS clusters together storage building blocks over Infiniband RDMA
+and/or TCP/IP interconnect, aggregating disk and memory resources and
+managing data in a single global namespace. GlusterFS is based on a
+stackable user space design, delivering exceptional performance for
+diverse workloads.
+
+![ Virtualized Cloud Environments ][]
+
+GlusterFS is designed for today's high-performance, virtualized cloud
+environments. Unlike traditional data centers, cloud environments
+require multi-tenancy along with the ability to grow or shrink resources
+on demand. Enterprises can scale capacity, performance, and availability
+on demand, with no vendor lock-in, across on-premise, public cloud, and
+hybrid environments.
+
+GlusterFS is in production at thousands of enterprises spanning media,
+healthcare, government, education, web 2.0, and financial services. The
+following table lists the commercial offerings and its documentation
+location:
+
+ ------------------------------------------------------------------------
+ Product Documentation Location
+ ----------- ------------------------------------------------------------
+ Red Hat [][]
+ Storage
+ Software
+ Appliance
+
+ Red Hat [][1]
+ Virtual
+ Storage
+ Appliance
+
+ Red Hat [][2]
+ Storage
+ ------------------------------------------------------------------------
+
+ [ Virtualized Cloud Environments ]: images/640px-GlusterFS_Architecture.png
+ []: http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html
+ [1]: http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html
+ [2]: http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html
diff --git a/doc/admin-guide/en-US/markdown/glossary.md b/doc/admin-guide/en-US/markdown/glossary.md
new file mode 100644
index 000000000..0febaff8f
--- /dev/null
+++ b/doc/admin-guide/en-US/markdown/glossary.md
@@ -0,0 +1,134 @@
+Glossary
+========
+
+Brick
+: A Brick is the GlusterFS basic unit of storage, represented by an
+ export directory on a server in the trusted storage pool. A Brick is
+ expressed by combining a server with an export directory in the
+ following format:
+
+ `SERVER:EXPORT`
+
+ For example:
+
+ `myhostname:/exports/myexportdir/`
+
+Cluster
+: A cluster is a group of linked computers, working together closely
+ thus in many respects forming a single computer.
+
+Distributed File System
+: A file system that allows multiple clients to concurrently access
+ data over a computer network.
+
+Filesystem
+: A method of storing and organizing computer files and their data.
+ Essentially, it organizes these files into a database for the
+ storage, organization, manipulation, and retrieval by the computer's
+ operating system.
+
+ Source: [Wikipedia][]
+
+FUSE
+: Filesystem in Userspace (FUSE) is a loadable kernel module for
+ Unix-like computer operating systems that lets non-privileged users
+ create their own file systems without editing kernel code. This is
+ achieved by running file system code in user space while the FUSE
+ module provides only a "bridge" to the actual kernel interfaces.
+
+ Source: [Wikipedia][1]
+
+Geo-Replication
+: Geo-replication provides a continuous, asynchronous, and incremental
+ replication service from site to another over Local Area Networks
+ (LAN), Wide Area Network (WAN), and across the Internet.
+
+glusterd
+: The Gluster management daemon that needs to run on all servers in
+ the trusted storage pool.
+
+Metadata
+: Metadata is data providing information about one or more other
+ pieces of data.
+
+Namespace
+: Namespace is an abstract container or environment created to hold a
+ logical grouping of unique identifiers or symbols. Each Gluster
+ volume exposes a single namespace as a POSIX mount point that
+ contains every file in the cluster.
+
+Open Source
+: Open source describes practices in production and development that
+ promote access to the end product's source materials. Some consider
+ open source a philosophy, others consider it a pragmatic
+ methodology.
+
+ Before the term open source became widely adopted, developers and
+ producers used a variety of phrases to describe the concept; open
+ source gained hold with the rise of the Internet, and the attendant
+ need for massive retooling of the computing source code.
+
+ Opening the source code enabled a self-enhancing diversity of
+ production models, communication paths, and interactive communities.
+ Subsequently, a new, three-word phrase "open source software" was
+ born to describe the environment that the new copyright, licensing,
+ domain, and consumer issues created.
+
+ Source: [Wikipedia][2]
+
+Petabyte
+: A petabyte (derived from the SI prefix peta- ) is a unit of
+ information equal to one quadrillion (short scale) bytes, or 1000
+ terabytes. The unit symbol for the petabyte is PB. The prefix peta-
+ (P) indicates a power of 1000:
+
+ 1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.
+
+ The term "pebibyte" (PiB), using a binary prefix, is used for the
+ corresponding power of 1024.
+
+ Source: [Wikipedia][3]
+
+POSIX
+: Portable Operating System Interface (for Unix) is the name of a
+ family of related standards specified by the IEEE to define the
+ application programming interface (API), along with shell and
+ utilities interfaces for software compatible with variants of the
+ Unix operating system. Gluster exports a fully POSIX compliant file
+ system.
+
+RAID
+: Redundant Array of Inexpensive Disks (RAID) is a technology that
+ provides increased storage reliability through redundancy, combining
+ multiple low-cost, less-reliable disk drives components into a
+ logical unit where all drives in the array are interdependent.
+
+RRDNS
+: Round Robin Domain Name Service (RRDNS) is a method to distribute
+ load across application servers. RRDNS is implemented by creating
+ multiple A records with the same name and different IP addresses in
+ the zone file of a DNS server.
+
+Trusted Storage Pool
+: A storage pool is a trusted network of storage servers. When you
+ start the first server, the storage pool consists of that server
+ alone.
+
+Userspace
+: Applications running in user space don’t directly interact with
+ hardware, instead using the kernel to moderate access. Userspace
+ applications are generally more portable than applications in kernel
+ space. Gluster is a user space application.
+
+Volfile
+: Volfile is a configuration file used by glusterfs process. Volfile
+ will be usually located at `/var/lib/glusterd/vols/VOLNAME`.
+
+Volume
+: A volume is a logical collection of bricks. Most of the gluster
+ management operations happen on the volume.
+
+ [Wikipedia]: http://en.wikipedia.org/wiki/Filesystem
+ [1]: http://en.wikipedia.org/wiki/Filesystem_in_Userspace
+ [2]: http://en.wikipedia.org/wiki/Open_source
+ [3]: http://en.wikipedia.org/wiki/Petabyte
diff --git a/doc/admin-guide/publican.cfg b/doc/admin-guide/publican.cfg
deleted file mode 100644
index e42fa1b3d..000000000
--- a/doc/admin-guide/publican.cfg
+++ /dev/null
@@ -1,12 +0,0 @@
-# Config::Simple 4.59
-# Thu Apr 5 11:09:15 2012
-
-xml_lang: "en-US"
-type: Book
-brand: Gluster_Brand
-prod_url: http://www.gluster.org
-doc_url: http://www.gluster.com/community/documentation/index.php/Main_Page
-condition: gfs
-show_remarks: 1
-
-
diff --git a/doc/legacy/authentication.txt b/doc/authentication.txt
index 73cb21d73..73cb21d73 100644
--- a/doc/legacy/authentication.txt
+++ b/doc/authentication.txt
diff --git a/doc/legacy/coding-standard.pdf b/doc/coding-standard.pdf
index bc9cb5620..bc9cb5620 100644
--- a/doc/legacy/coding-standard.pdf
+++ b/doc/coding-standard.pdf
Binary files differ
diff --git a/doc/legacy/coding-standard.tex b/doc/coding-standard.tex
index abaedb69c..30d412a91 100644
--- a/doc/legacy/coding-standard.tex
+++ b/doc/coding-standard.tex
@@ -370,8 +370,7 @@ sample_fop (call_frame_t *frame, xlator_t *this, ...)
}
if (pfd) {
- if (pfd->path)
- FREE (pfd->path);
+ FREE (pfd->path);
FREE (pfd);
pfd = NULL;
}
diff --git a/doc/examples/legacy/Makefile.am b/doc/examples/legacy/Makefile.am
deleted file mode 100644
index 49c9701ef..000000000
--- a/doc/examples/legacy/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA = README replicate.vol stripe.vol protocol-client.vol protocol-server.vol posix-locks.vol trash.vol write-behind.vol io-threads.vol io-cache.vol read-ahead.vol filter.vol trace.vol
-EXTRA_DIST = $(EXTRA)
-
-docdir = $(datadir)/doc/$(PACKAGE_NAME)
-Examplesdir = $(docdir)/examples
-Examples_DATA = $(EXTRA)
-
-CLEANFILES =
diff --git a/doc/examples/legacy/README b/doc/examples/legacy/README
deleted file mode 100644
index 732751571..000000000
--- a/doc/examples/legacy/README
+++ /dev/null
@@ -1,13 +0,0 @@
-GlusterFS's translator feature is very flexible and there are quite a lot of
-ways one can configure their filesystem to behave like.
-
-Volume Specification is a way in which GlusterFS understands how it has to work,
-based on what is written there.
-
-Going through the following URLs may give you more idea about all these.
-
-* http://www.gluster.org/docs/index.php/GlusterFS
-* http://www.gluster.org/docs/index.php/GlusterFS_Volume_Specification
-* http://www.gluster.org/docs/index.php/GlusterFS_Translators
-
-Mail us any doubts, suggestions on 'gluster-devel(at)nongnu.org'
diff --git a/doc/examples/legacy/filter.vol b/doc/examples/legacy/filter.vol
deleted file mode 100644
index 59bb23ecf..000000000
--- a/doc/examples/legacy/filter.vol
+++ /dev/null
@@ -1,23 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, afr, or unify over afr)
-#
-
-### 'Filter' translator is used on client side (or server side according to needs). This traslator makes all the below translators, (or say volumes) as read-only. Hence if one wants a 'read-only' filesystem, using filter as the top most volume will make it really fast as the fops are returned from this level itself.
-
-volume filter-ro
- type features/filter
- option root-squashing enable
-# option completely-read-only yes
-# translate-uid 1-99=0
- subvolumes client
-end-volume
diff --git a/doc/examples/legacy/io-cache.vol b/doc/examples/legacy/io-cache.vol
deleted file mode 100644
index a71745017..000000000
--- a/doc/examples/legacy/io-cache.vol
+++ /dev/null
@@ -1,31 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come
-# here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-### 'IO-Cache' translator is best used on client side when a filesystem has file
-# which are not modified frequently but read several times. For example, while
-# compiling a kernel, *.h files are read while compiling every *.c file, in
-# these case, io-cache translator comes very handy, as it keeps the whole file
-# content in the cache, and serves from the cache.
-# One can provide the priority of the cache too.
-
-volume ioc
- type performance/io-cache
- subvolumes client # In this example it is 'client' you may have to
- # change it according to your spec file.
- option cache-size 64MB # 32MB is default
- option force-revalidate-timeout 5 # 1second is default
- option priority *.html:2,*:1 # default is *:0
-end-volume
diff --git a/doc/examples/legacy/io-threads.vol b/doc/examples/legacy/io-threads.vol
deleted file mode 100644
index 236f5b8b1..000000000
--- a/doc/examples/legacy/io-threads.vol
+++ /dev/null
@@ -1,22 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'IO-threads' translator gives a threading behaviour to File I/O calls. All
-# other normal fops are having default behaviour. Loading this on server side
-# helps to reduce the contension of network. (Which is assumed as a GlusterFS
-# hang).
-
-volume iot
- type performance/io-threads
- subvolumes brick
- option thread-count 4 # default value is 1
-end-volume
-
-volume server
- type protocol/server
- subvolumes iot
- option transport-type tcp # For TCP/IP transport
- option auth.addr.iot.allow 192.168.*
-end-volume
diff --git a/doc/examples/legacy/posix-locks.vol b/doc/examples/legacy/posix-locks.vol
deleted file mode 100644
index 673afa3f8..000000000
--- a/doc/examples/legacy/posix-locks.vol
+++ /dev/null
@@ -1,19 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-# 'Posix-locks' feature should be added on the server side.
-
-volume p-locks
- type features/posix-locks
- subvolumes brick
- option mandatory on
-end-volume
-
-volume server
- type protocol/server
- subvolumes p-locks
- option transport-type tcp
- option auth.addr.p-locks.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/legacy/protocol-client.vol b/doc/examples/legacy/protocol-client.vol
deleted file mode 100644
index c34ef790d..000000000
--- a/doc/examples/legacy/protocol-client.vol
+++ /dev/null
@@ -1,12 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma # for Infiniband verbs transport
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-# option transport.rdma.remote-port 24016
- option remote-subvolume brick # name of the remote volume
-end-volume
diff --git a/doc/examples/legacy/protocol-server.vol b/doc/examples/legacy/protocol-server.vol
deleted file mode 100644
index 195e49657..000000000
--- a/doc/examples/legacy/protocol-server.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-# option transport.rdma.listen-port 24016
-
-# option bind-address 192.168.1.10 # Default is to listen on all interfaces
- subvolumes brick
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/read-ahead.vol b/doc/examples/legacy/read-ahead.vol
deleted file mode 100644
index 9e4dba556..000000000
--- a/doc/examples/legacy/read-ahead.vol
+++ /dev/null
@@ -1,24 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-# 'Read-Ahead' translator is best utilized on client side, as it prefetches
-# the file contents when the first read() call is issued.
-
-volume ra
- type performance/read-ahead
- subvolumes client
- option page-count 4 # default is 2
- option force-atime-update no # defalut is 'no'
-end-volume
diff --git a/doc/examples/legacy/replicate.vol b/doc/examples/legacy/replicate.vol
deleted file mode 100644
index 10626d46f..000000000
--- a/doc/examples/legacy/replicate.vol
+++ /dev/null
@@ -1,118 +0,0 @@
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# replicate spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add replicate feature.
-volume replicate
- type cluster/replicate
- subvolumes client1 client2 client3
-end-volume
diff --git a/doc/examples/legacy/stripe.vol b/doc/examples/legacy/stripe.vol
deleted file mode 100644
index 9524e8198..000000000
--- a/doc/examples/legacy/stripe.vol
+++ /dev/null
@@ -1,120 +0,0 @@
-
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add Stripe Feature.
-volume stripe
- type cluster/stripe
- subvolumes client1 client2 client3
- option block-size 1MB
-end-volume
diff --git a/doc/examples/legacy/trace.vol b/doc/examples/legacy/trace.vol
deleted file mode 100644
index 59830f26a..000000000
--- a/doc/examples/legacy/trace.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### 'Trace' translator is a very handy debug tool for GlusterFS, as it can be
-# loaded between any of the two volumes without changing the behaviour of the
-# filesystem.
-# On client side it can be the top most volume in spec (like now) to understand
-# what calls are made on FUSE filesystem, when a mounted filesystem is
-# accessed.
-
-volume trace
- type debug/trace
- subvolumes client
-end-volume
-
-# 'NOTE:' By loading 'debug/trace' translator, filesystem will be very slow as
-# it logs each and every calls to the log file.
diff --git a/doc/examples/legacy/trash.vol b/doc/examples/legacy/trash.vol
deleted file mode 100644
index 3fcf315af..000000000
--- a/doc/examples/legacy/trash.vol
+++ /dev/null
@@ -1,20 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'Trash' translator is best used on server side as it just renames the
-# deleted file inside 'trash-dir', and it makes 4 seperate fops for one unlink
-# call.
-volume trashcan
- type features/trash
- subvolumes brick
- option trash-dir /.trashcan
-end-volume
-
-volume server
- type protocol/server
- subvolumes trashcan
- option transport-type tcp # For TCP/IP transport
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/write-behind.vol b/doc/examples/legacy/write-behind.vol
deleted file mode 100644
index 2b5ed4139..000000000
--- a/doc/examples/legacy/write-behind.vol
+++ /dev/null
@@ -1,27 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-
-# 'Write-behind' translator is a performance booster for write operation. Best
-# used on client side, as its main intension is to reduce the network latency
-# caused for each write operation.
-
-volume wb
- type performance/write-behind
- subvolumes client
- option flush-behind on # default value is 'off'
- option window-size 2MB
- option enable-O_SYNC no # default is no
- option disable-for-first-nbytes 128KB #default is 1
-end-volume
diff --git a/doc/features/rdma-cm-in-3.4.0.txt b/doc/features/rdma-cm-in-3.4.0.txt
new file mode 100644
index 000000000..fd953e56b
--- /dev/null
+++ b/doc/features/rdma-cm-in-3.4.0.txt
@@ -0,0 +1,9 @@
+Following is the impact of http://review.gluster.org/#change,149.
+
+New userspace packages needed:
+librdmacm
+librdmacm-devel
+
+rdmacm needs an IPoIB address for connection establishment. This requirement results in following issues:
+* Because of bug #890502, we've to probe the peer on an IPoIB address. This imposes a restriction that all volumes created in the future have to communicate over IPoIB address (irrespective of whether they use gluster's tcp or rdma transport).
+* Currently client has an independence to choose b/w tcp and rdma transports while communicating with the server (by creating volumes with transport-type tcp,rdma). This independence was a byproduct of our ability use the normal channel used with transport-type tcp for rdma connectiion establishment handshake too. However, with new requirement of IPoIB address for connection establishment, we loose this independence (till we bring in multi-network support - where a brick can be identified by a set of ip-addresses and we can choose different pairs of ip-addresses for communication based on our requirements - in glusterd).
diff --git a/doc/features/rebalance.md b/doc/features/rebalance.md
new file mode 100644
index 000000000..29b993008
--- /dev/null
+++ b/doc/features/rebalance.md
@@ -0,0 +1,74 @@
+## Background
+
+
+For a more detailed description, view Jeff Darcy's blog post [here]
+(http://hekafs.org/index.php/2012/03/glusterfs-algorithms-distribution/)
+
+GlusterFS uses the distribute translator (DHT) to aggregate space of multiple servers. DHT distributes files among its subvolumes using a consistent hashing method providing 32-bit hashes. Each DHT subvolume is given a range in the 32-bit hash space. A hash value is calculated for every file using a combination of its name. The file is then placed in the subvolume with the hash range that contains the hash value.
+
+## What is rebalance?
+
+The rebalance process migrates files between the DHT subvolumes when necessary.
+
+## When is rebalance required?
+
+Rebalancing is required for two main cases.
+
+1. Addition/Removal of bricks
+
+2. Renaming of a file
+
+## Addition/Removal of bricks
+
+Whenever the number or order of DHT subvolumes change, the hash range given to each subvolume is recalculated. When this happens, already existing files on the volume will need to be moved to the correct subvolume based on their hash. Rebalance does this activity.
+
+Addition of bricks which increase the size of a volume will increase the number of DHT subvolumes and lead to recalculation of hash ranges (This doesn't happen when bricks are added to a volume to increase redundancy, i.e. increase replica count of a volume). This will require an explicit rebalance command to be issued to migrate the files.
+
+Removal of bricks which decrease the size of a volumes also causes the hash ranges of DHT to be recalculated. But we don't need to issue an explicit rebalance command in this case, as rebalance is done automatically by the remove-brick process if needed.
+
+## Renaming of a file
+
+Renaming of file will cause its hash to change. The file now needs to be moved to the correct subvolume based on its new hash. Rebalance does this.
+
+## How does rebalance work?
+
+At a high level, the rebalance process consists of the following 3 steps:
+
+1. Crawl the volume to access all files
+2. Calculate the hash for the file
+3. If needed move the migrate the file to the correct subvolume.
+
+
+The rebalance process has been optimized by making it distributed across the trusted storage pool. With distributed rebalance, a rebalance process is launched on each peer in the cluster. Each rebalance process will crawl files on only those bricks of the volume which are present on it, and migrate the files which need migration to the correct brick. This speeds up the rebalance process considerably.
+
+## What will happen if rebalance is not run?
+
+### Addition of bricks
+
+With the current implementation of add-brick, when the size of a volume is augmented by adding new bricks, the new bricks are not put into use immediately i.e., the hash ranges there not recalculated immediately. This means that the files will still be placed only onto the existing bricks, leaving the newly added storage space unused. Starting a rebalance process on the volume will cause the hash ranges to be recalculated with the new bricks included, which allows the newly added storage space to be used.
+
+### Renaming a file
+
+When a file rename causes the file to be hashed to a new subvolume, DHT writes a link file on the new subvolume leaving the actual file on the original subvolume. A link file is an empty file, which has an extended attribute set that points to the subvolume on which the actual file exists. So, when a client accesses the renamed file, DHT first looks for the file in the hashed subvolume and gets the link file. DHT understands the link file, and gets the actual file from the subvolume pointed to by the link file. This leads to a slight reduction in performance. A rebalance will move the actual file to the hashed subvolume, allowing clients to access the file directly once again.
+
+## Are clients affected during a rebalance process?
+
+The rebalance process is transparent to applications on the clients. Applications which have open files on the volume will not be affected by the rebalance process, even if the open file requires migration. The DHT translator on the client will hide the migration from the applications.
+
+##How are open files migrated?
+
+(A more technical description of the algorithm used can be seen in the commit message of commit a07bb18c8adeb8597f62095c5d1361c5bad01f09.)
+
+To achieve migration of open files, two things need to be assured of,
+a) any writes or changes happening to the file during migration are correctly synced to destination subvolume after the migration is complete.
+b) any further changes should be made to the destination subvolume
+
+Both of these requirements require sending notificatoins to clients. Clients are notified by overloading an attribute used in every callback functions. DHT understands these attributes in the callbacks and can be notified if a file is being migrated or not.
+
+During rebalance, a file will be in two phases
+
+1. Migration in process - In this phase the file is being migrated by the rebalance process from the source subvolume to the destination subvolume. The rebalance process will set a 'in-migration' attribute on the file, which will notify the clients' DHT translator. The clients' DHT translator will then take care to send any further changes to the destination subvolume as well. This way we satisfy the first requirement
+
+2. Migration completed - Once the file has been migrated, the rebalance process will set a 'migration-complete' attribute on the file. The clients will be notified of the completion and all further operations on the file will happen on the destination subvolume.
+
+The DHT translator handles the above and allows the applications on the clients to continue working on a file under migration.
diff --git a/doc/gluster.8 b/doc/gluster.8
index 87c812081..3c78fb8b1 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -1,18 +1,11 @@
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see " <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.TH Gluster 8 "Gluster command line utility" "07 March 2011" "Gluster Inc."
@@ -121,7 +114,7 @@ Display the command options.
Exit the gluster command line interface.
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBglusterfs\fR(8), \fBglusterd\fR(8)
diff --git a/doc/glusterd.8 b/doc/glusterd.8
index 267a7e00f..04a43481e 100644
--- a/doc/glusterd.8
+++ b/doc/glusterd.8
@@ -1,20 +1,11 @@
.\"
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
@@ -59,7 +50,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index 37bf67d18..60ad5709b 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -32,7 +23,8 @@ be made of any commodity hardware, such as x86-64 server with SATA-II RAID and
Infiniband HBA.
GlusterFS is fully POSIX compliant file system. On client side, it has dependency
-on FUSE package, on server side, it works seemlessly on different operating systems. Currently supported on GNU/Linux and Solaris.
+on FUSE package, on server side, it works seemlessly on different operating systems.
+Currently supported on GNU/Linux and Solaris.
.SH OPTIONS
@@ -40,33 +32,55 @@ on FUSE package, on server side, it works seemlessly on different operating syst
.PP
.TP
\fB\-f, \fB\-\-volfile=VOLUME-FILE\fR
-File to use as VOLUME-FILE (the default is /etc/glusterfs/glusterfs.vol).
+File to use as VOLUME-FILE.
.TP
\fB\-l, \fB\-\-log\-file=LOGFILE\fR
-File to use for logging.
+File to use for logging (the default is <INSTALL-DIR>/var/log/glusterfs/<MOUNT-POINT>.log).
.TP
\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
-Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is WARNING).
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
\fB\-s, \fB\-\-volfile\-server=SERVER\fR
Server to get the volume from. This option overrides \fB\-\-volfile \fR option.
+.TP
+\fB\-\-volfile\-max\-fetch\-attempts=MAX\-ATTEMPTS\fR
+Maximum number of connect attempts to server. This option should be provided with
+\fB\-\-volfile\-server\fR option (the default is 1).
.SS "Advanced options"
.PP
.TP
+\fB\-\-acl\fR
+Mount the filesystem with POSIX ACL support.
+.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG,
and \fB\-\-log\-file\fR to console.
.TP
+\fB\-\-enable\-ino32=BOOL\fR
+Use 32-bit inodes when mounting to workaround application that doesn't support 64-bit inodes.
+.TP
+\fB\-\-fopen\-keep\-cache\fR
+Do not purge the cache on file open.
+.TP
+\fB\-\-mac\-compat=BOOL\fR
+Provide stubs for attributes needed for seamless operation on Macs (the default is off).
+.TP
\fB\-N, \fB\-\-no\-daemon\fR
Run in the foreground.
.TP
-\fB\-\-read\-only\fR
-Make the file system read-only.
-.TP
\fB\-p, \fB\-\-pid\-file=PIDFILE\fR
File to use as PID file.
.TP
+\fB\-\-read\-only\fR
+Mount the file system in 'read-only' mode.
+.TP
+\fB\-\-selinux\fR
+Enable SELinux label (extended attributes) support on inodes.
+.TP
+\fB\-S, \fB\-\-socket\-file=SOCKFILE\fR
+File to use as unix-socket.
+.TP
\fB\-\-volfile\-id=KEY\fR
Key of the volume file to be fetched from the server.
.TP
@@ -74,11 +88,14 @@ Key of the volume file to be fetched from the server.
Port number of volfile server.
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server (the default is socket).
+Transport type to get volume file from server (the default is tcp).
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT (the default is top most volume in VOLUME-FILE).
.TP
+\fB\-\-worm\fR
+Mount the filesystem in 'worm' mode.
+.TP
\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
Add/Override a translator option for a volume with the specified value.
@@ -89,11 +106,29 @@ Add/Override a translator option for a volume with the specified value.
\fB\-\-attribute\-timeout=SECONDS\fR
Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1).
.TP
-\fB\-\-entry\-timeout=SECONDS\fR
-Set entry timeout to SECONDS in fuse kernel module (the default is 1).
+\fB\-\-background\-qlen=N\fR
+Set fuse module's background queue length to N (the default is 64).
+.TP
+\fB\-\-congestion\-threshold=N\fR
+Set fuse module's congestion threshold to N (the default is 48).
.TP
\fB\-\-direct\-io\-mode=BOOL\fR
Enable/Disable the direct-I/O mode in fuse module (the default is enable).
+.TP
+\fB\-\-dump-fuse=PATH\f\R
+Dump fuse traffic to PATH
+.TP
+\fB\-\-entry\-timeout=SECONDS\fR
+Set entry timeout to SECONDS in fuse kernel module (the default is 1).
+.TP
+\fB\-\-gid\-timeout=SECONDS\fR
+Set auxilary group list timeout to SECONDS for fuse translator (the default is 0).
+.TP
+\fB\-\-negative\-timeout=SECONDS\fR
+Set negative timeout to SECONDS in fuse kernel module (the default is 0).
+.TP
+\fB\-\-volfile-check\fR
+Enable strict volume file checking.
.SS "Miscellaneous Options"
.PP
@@ -110,7 +145,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterfs/*.vol, /etc/glusterd/vols/*/*.vol
+/var/lib/glusterd/vols/*/*.vol
.SH EXAMPLES
mount a volume named foo on server bar with log level DEBUG on mount point
/mnt/foo
@@ -124,6 +159,6 @@ mount a volume named foo on server bar with log level DEBUG on mount point
.fi
.SH COPYRIGHT
.nf
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2011 Red Hat, Inc. <http://www.redhat.com>
\fR
.fi
diff --git a/doc/glusterfs.vol.sample b/doc/glusterfs.vol.sample
deleted file mode 100644
index 977363b92..000000000
--- a/doc/glusterfs.vol.sample
+++ /dev/null
@@ -1,53 +0,0 @@
-### file: client-volume.vol.sample
-
-#####################################
-### GlusterFS Client Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Add client feature and attach to remote subvolume
-volume client
- type protocol/client
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
- option remote-host 127.0.0.1 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma
-# option transport.rdma.remote-port 24016
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### Add readahead feature
-#volume readahead
-# type performance/read-ahead
-# option page-count 2 # cache per file = (page-count x page-size)
-# subvolumes client
-#end-volume
-
-### Add IO-Cache feature
-#volume iocache
-# type performance/io-cache
-# subvolumes readahead
-#end-volume
-
-### Add writeback feature
-#volume writeback
-# type performance/write-behind
-# option window-size 2MB
-# option flush-behind off
-# subvolumes iocache
-#end-volume
diff --git a/doc/glusterfsd.8 b/doc/glusterfsd.8
index 17d053a5c..176d04236 100644
--- a/doc/glusterfsd.8
+++ b/doc/glusterfsd.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 20088888888-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -89,7 +80,7 @@ KEY of the volume file to be fetched from server
Port number of volfile server
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server [default: socket]
+Transport type to get volume file from server [default: tcp]
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT [default: top most volume in
@@ -131,7 +122,7 @@ Print program version
.SH EXAMPLES
Start a GlusterFS server on localhost with volume name foo
-glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /etc/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
+glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /var/lib/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
.SH SEE ALSO
.nf
diff --git a/doc/glusterfsd.vol.sample b/doc/glusterfsd.vol.sample
deleted file mode 100644
index ec2fd341e..000000000
--- a/doc/glusterfsd.vol.sample
+++ /dev/null
@@ -1,44 +0,0 @@
-### file: server-volume.vol.sample
-
-#####################################
-### GlusterFS Server Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Multiple values to options will be : delimited.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
-# option transport.socket.bind-address 192.168.1.10 # Default is to listen
- # on all interfaces
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.listen-port 24016
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-
- subvolumes brick
-# NOTE: Access to any volume through protocol/server is denied by
-# default. You need to explicitly grant access through # "auth"
-# option.
- option auth.addr.brick.allow * # Allow access to "brick" volume
-end-volume
diff --git a/doc/hacker-guide/en-US/markdown/adding-fops.md b/doc/hacker-guide/en-US/markdown/adding-fops.md
new file mode 100644
index 000000000..3f72ed3e2
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/adding-fops.md
@@ -0,0 +1,18 @@
+Adding a new FOP
+================
+
+Steps to be followed when adding a new FOP to GlusterFS:
+
+1. Edit `glusterfs.h` and add a `GF_FOP_*` constant.
+2. Edit `xlator.[ch]` and:
+ * add the new prototype for fop and callback.
+ * edit `xlator_fops` structure.
+3. Edit `xlator.c` and add to fill_defaults.
+4. Edit `protocol.h` and add struct necessary for the new FOP.
+5. Edit `defaults.[ch]` and provide default implementation.
+6. Edit `call-stub.[ch]` and provide stub implementation.
+7. Edit `common-utils.c` and add to gf_global_variable_init().
+8. Edit client-protocol and add your FOP.
+9. Edit server-protocol and add your FOP.
+10. Implement your FOP in any translator for which the default implementation
+ is not sufficient.
diff --git a/doc/hacker-guide/en-US/markdown/afr.md b/doc/hacker-guide/en-US/markdown/afr.md
new file mode 100644
index 000000000..1be7e39f2
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/afr.md
@@ -0,0 +1,191 @@
+cluster/afr translator
+======================
+
+Locking
+-------
+
+Before understanding replicate, one must understand two internal FOPs:
+
+### `GF_FILE_LK`
+
+This is exactly like `fcntl(2)` locking, except the locks are in a
+separate domain from locks held by applications.
+
+### `GF_DIR_LK (loc_t *loc, char *basename)`
+
+This allows one to lock a name under a directory. For example,
+to lock /mnt/glusterfs/foo, one would use the call:
+
+```
+GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
+```
+
+If one wishes to lock *all* the names under a particular directory,
+supply the basename argument as `NULL`.
+
+The locks can either be read locks or write locks; consult the
+function prototype for more details.
+
+Both these operations are implemented by the features/locks (earlier
+known as posix-locks) translator.
+
+Basic design
+------------
+
+All FOPs can be classified into four major groups:
+
+### inode-read
+
+Operations that read an inode's data (file contents) or metadata (perms, etc.).
+
+access, getxattr, fstat, readlink, readv, stat.
+
+### inode-write
+
+Operations that modify an inode's data or metadata.
+
+chmod, chown, truncate, writev, utimens.
+
+### dir-read
+
+Operations that read a directory's contents or metadata.
+
+readdir, getdents, checksum.
+
+### dir-write
+
+Operations that modify a directory's contents or metadata.
+
+create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
+
+Some of these make a subgroup in that they modify *two* different entries:
+link, rename, symlink.
+
+### Others
+
+Other operations.
+
+flush, lookup, open, opendir, statfs.
+
+Algorithms
+----------
+
+Each of the four major groups has its own algorithm:
+
+### inode-read, dir-read
+
+1. Send a request to the first child that is up:
+ * if it fails:
+ * try the next available child
+ * if we have exhausted all children:
+ * return failure
+
+### inode-write
+
+ All operations are done in parallel unless specified otherwise.
+
+1. Send a ``GF_FILE_LK`` request on all children for a write lock on the
+ appropriate region
+ (for metadata operations: entire file (0, 0) for writev:
+ (offset, offset+size of buffer))
+ * If a lock request fails on a child:
+ * unlock all children
+ * try to acquire a blocking lock (`F_SETLKW`) on each child, serially.
+ If this fails (due to `ENOTCONN` or `EINVAL`):
+ Consider this child as dead for rest of transaction.
+2. Mark all children as "pending" on all (alive) children (see below for
+meaning of "pending").
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+3. Perform operation on all (alive) children.
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+4. Unmark all successful children as not "pending" on all nodes.
+5. Unlock region on all (alive) children.
+
+### dir-write
+
+ The algorithm for dir-write is same as above except instead of holding
+ `GF_FILE_LK` locks we hold a GF_DIR_LK lock on the name being operated upon.
+ In case of link-type calls, we hold locks on both the operand names.
+
+"pending"
+---------
+
+The "pending" number is like a journal entry. A pending entry is an
+array of 32-bit integers stored in network byte-order as the extended
+attribute of an inode (which can be a directory as well).
+
+There are three keys corresponding to three types of pending operations:
+
+### `AFR_METADATA_PENDING`
+
+There are some metadata operations pending on this inode (perms, ctime/mtime,
+xattr, etc.).
+
+### `AFR_DATA_PENDING`
+
+There is some data pending on this inode (writev).
+
+### `AFR_ENTRY_PENDING`
+
+There are some directory operations pending on this directory
+(create, unlink, etc.).
+
+Self heal
+---------
+
+* On lookup, gather extended attribute data:
+ * If entry is a regular file:
+ * If an entry is present on one child and not on others:
+ * create entry on others.
+ * If entries exist but have different metadata (perms, etc.):
+ * consider the entry with the highest `AFR_METADATA_PENDING` number as
+ definitive and replicate its attributes on children.
+ * If entry is a directory:
+ * Consider the entry with the higest `AFR_ENTRY_PENDING` number as
+ definitive and replicate its contents on all children.
+ * If any two entries have non-matching types (i.e., one is file and
+ other is directory):
+ * Announce to the user via log that a split-brain situation has been
+ detected, and do nothing.
+* On open, gather extended attribute data:
+ * Consider the file with the highest `AFR_DATA_PENDING` number as
+ the definitive one and replicate its contents on all other
+ children.
+
+During all self heal operations, appropriate locks must be held on all
+regions/entries being affected.
+
+Inode scaling
+-------------
+
+Inode scaling is necessary because if a situation arises where an inode number
+is returned for a directory (by lookup) which was previously the inode number
+of a file (as per FUSE's table), then FUSE gets horribly confused (consult a
+FUSE expert for more details).
+
+To avoid such a situation, we distribute the 64-bit inode space equally
+among all children of replicate.
+
+To illustrate:
+
+If c1, c2, c3 are children of replicate, they each get 1/3 of the available
+inode space:
+
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
+Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+
+Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
+(which is the second inode number in c1's space).
+
+This way we ensure that there is never a collision of inode numbers from
+two different children.
+
+This reduction of inode space doesn't really reduce the usability of
+replicate since even if we assume replicate has 1024 children (which would be a
+highly unusual scenario), each child still has a 54-bit inode space:
+$2^{54} \sim 1.8 \times 10^{16}$, which is much larger than any real
+world requirement.
diff --git a/doc/hacker-guide/en-US/markdown/coding-standard.md b/doc/hacker-guide/en-US/markdown/coding-standard.md
new file mode 100644
index 000000000..178dc142a
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/coding-standard.md
@@ -0,0 +1,402 @@
+GlusterFS Coding Standards
+==========================
+
+Structure definitions should have a comment per member
+------------------------------------------------------
+
+Every member in a structure definition must have a comment about its
+purpose. The comment should be descriptive without being overly verbose.
+
+*Bad:*
+
+```
+gf_lock_t lock; /* lock */
+```
+
+*Good:*
+
+```
+DBTYPE access_mode; /* access mode for accessing
+ * the databases, can be
+ * DB_HASH, DB_BTREE
+ * (option access-mode <mode>)
+ */
+```
+
+Declare all variables at the beginning of the function
+------------------------------------------------------
+
+All local variables in a function must be declared immediately after the
+opening brace. This makes it easy to keep track of memory that needs to be freed
+during exit. It also helps debugging, since gdb cannot handle variables
+declared inside loops or other such blocks.
+
+Always initialize local variables
+---------------------------------
+
+Every local variable should be initialized to a sensible default value
+at the point of its declaration. All pointers should be initialized to NULL,
+and all integers should be zero or (if it makes sense) an error value.
+
+
+*Good:*
+
+```
+int ret = 0;
+char *databuf = NULL;
+int _fd = -1;
+```
+
+Initialization should always be done with a constant value
+----------------------------------------------------------
+
+Never use a non-constant expression as the initialization value for a variable.
+
+
+*Bad:*
+
+```
+pid_t pid = frame->root->pid;
+char *databuf = malloc (1024);
+```
+
+Validate all arguments to a function
+------------------------------------
+
+All pointer arguments to a function must be checked for `NULL`.
+A macro named `VALIDATE` (in `common-utils.h`)
+takes one argument, and if it is `NULL`, writes a log message and
+jumps to a label called `err` after setting op_ret and op_errno
+appropriately. It is recommended to use this template.
+
+
+*Good:*
+
+```
+VALIDATE(frame);
+VALIDATE(this);
+VALIDATE(inode);
+```
+
+Never rely on precedence of operators
+-------------------------------------
+
+Never write code that relies on the precedence of operators to execute
+correctly. Such code can be hard to read and someone else might not
+know the precedence of operators as accurately as you do.
+
+*Bad:*
+
+```
+if (op_ret == -1 && errno != ENOENT)
+```
+
+*Good:*
+
+```
+if ((op_ret == -1) && (errno != ENOENT))
+```
+
+Use exactly matching types
+--------------------------
+
+Use a variable of the exact type declared in the manual to hold the
+return value of a function. Do not use an ``equivalent'' type.
+
+
+*Bad:*
+
+```
+int len = strlen (path);
+```
+
+*Good:*
+
+```
+size_t len = strlen (path);
+```
+
+Never write code such as `foo->bar->baz`; check every pointer
+-------------------------------------------------------------
+
+Do not write code that blindly follows a chain of pointer
+references. Any pointer in the chain may be `NULL` and thus
+cause a crash. Verify that each pointer is non-null before following
+it.
+
+Check return value of all functions and system calls
+----------------------------------------------------
+
+The return value of all system calls and API functions must be checked
+for success or failure.
+
+*Bad:*
+
+```
+close (fd);
+```
+
+*Good:*
+
+```
+op_ret = close (_fd);
+if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "close on file %s failed (%s)", real_path,
+ strerror (errno));
+ op_errno = errno;
+ goto out;
+}
+```
+
+
+Gracefully handle failure of malloc
+-----------------------------------
+
+GlusterFS should never crash or exit due to lack of memory. If a
+memory allocation fails, the call should be unwound and an error
+returned to the user.
+
+*Use result args and reserve the return value to indicate success or failure:*
+
+The return value of every functions must indicate success or failure (unless
+it is impossible for the function to fail --- e.g., boolean functions). If
+the function needs to return additional data, it must be returned using a
+result (pointer) argument.
+
+*Bad:*
+
+```
+int32_t dict_get_int32 (dict_t *this, char *key);
+```
+
+*Good:*
+
+```
+int dict_get_int32 (dict_t *this, char *key, int32_t *val);
+```
+
+Always use the `n' versions of string functions
+-----------------------------------------------
+
+Unless impossible, use the length-limited versions of the string functions.
+
+*Bad:*
+
+```
+strcpy (entry_path, real_path);
+```
+
+*Good:*
+
+```
+strncpy (entry_path, real_path, entry_path_len);
+```
+
+No dead or commented code
+-------------------------
+
+There must be no dead code (code to which control can never be passed) or
+commented out code in the codebase.
+
+Only one unwind and return per function
+---------------------------------------
+
+There must be only one exit out of a function. `UNWIND` and return
+should happen at only point in the function.
+
+Function length or Keep functions small
+---------------------------------------
+
+We live in the UNIX-world where modules do one thing and do it well.
+This rule should apply to our functions also. If a function is very long, try splitting it
+into many little helper functions. The question is, in a coding
+spree, how do we know a function is long and unreadable. One rule of
+thumb given by Linus Torvalds is that, a function should be broken-up
+if you have 4 or more levels of indentation going on for more than 3-4
+lines.
+
+*Example for a helper function:*
+```
+static int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+```
+
+Defining functions as static
+----------------------------
+
+Define internal functions as static only if you're
+very sure that there will not be a crash(..of any kind..) emanating in
+that function. If there is even a remote possibility, perhaps due to
+pointer derefering, etc, declare the function as non-static. This
+ensures that when a crash does happen, the function name shows up the
+in the back-trace generated by libc. However, doing so has potential
+for polluting the function namespace, so to avoid conflicts with other
+components in other parts, ensure that the function names are
+prepended with a prefix that identify the component to which it
+belongs. For eg. non-static functions in io-threads translator start
+with iot_.
+
+Ensure function calls wrap around after 80-columns
+--------------------------------------------------
+
+Place remaining arguments on the next line if needed.
+
+Functions arguments and function definition
+-------------------------------------------
+
+Place all the arguments of a function definition on the same line
+until the line goes beyond 80-cols. Arguments that extend beyind
+80-cols should be placed on the next line.
+
+Style issues
+------------
+
+### Brace placement
+
+Use K&R/Linux style of brace placement for blocks.
+
+*Good:*
+
+```
+int some_function (...)
+{
+ if (...) {
+ /* ... */
+ } else if (...) {
+ /* ... */
+ } else {
+ /* ... */
+ }
+
+ do {
+ /* ... */
+ } while (cond);
+}
+```
+
+### Indentation
+
+Use *eight* spaces for indenting blocks. Ensure that your
+file contains only spaces and not tab characters. You can do this
+in Emacs by selecting the entire file (`C-x h`) and
+running `M-x untabify`.
+
+To make Emacs indent lines automatically by eight spaces, add this
+line to your `.emacs`:
+
+```
+(add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
+```
+
+### Comments
+
+Write a comment before every function describing its purpose (one-line),
+its arguments, and its return value. Mention whether it is an internal
+function or an exported function.
+
+Write a comment before every structure describing its purpose, and
+write comments about each of its members.
+
+Follow the style shown below for comments, since such comments
+can then be automatically extracted by doxygen to generate
+documentation.
+
+*Good:*
+
+```
+/**
+* hash_name -hash function for filenames
+* @par: parent inode number
+* @name: basename of inode
+* @mod: number of buckets in the hashtable
+*
+* @return: success: bucket number
+* failure: -1
+*
+* Not for external use.
+*/
+```
+
+### Indicating critical sections
+
+To clearly show regions of code which execute with locks held, use
+the following format:
+
+```
+pthread_mutex_lock (&mutex);
+{
+ /* code */
+}
+pthread_mutex_unlock (&mutex);
+```
+
+*A skeleton fop function:*
+
+This is the recommended template for any fop. In the beginning come
+the initializations. After that, the `success' control flow should be
+linear. Any error conditions should cause a `goto` to a single
+point, `out`. At that point, the code should detect the error
+that has occured and do appropriate cleanup.
+
+```
+int32_t
+sample_fop (call_frame_t *frame, xlator_t *this, ...)
+{
+ char * var1 = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ DIR * dir = NULL;
+ struct posix_fd * pfd = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ /* other validations */
+
+ dir = opendir (...);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* another system call */
+ if (...) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ /* ... */
+
+ out:
+ if (op_ret == -1) {
+
+ /* check for all the cleanup that needs to be
+ done */
+
+ if (dir) {
+ closedir (dir);
+ dir = NULL;
+ }
+
+ if (pfd) {
+ FREE (pfd->path);
+ FREE (pfd);
+ pfd = NULL;
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+```
diff --git a/doc/hacker-guide/en-US/markdown/posix.md b/doc/hacker-guide/en-US/markdown/posix.md
new file mode 100644
index 000000000..84c813e55
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/posix.md
@@ -0,0 +1,59 @@
+storage/posix translator
+========================
+
+Notes
+-----
+
+### `SET_FS_ID`
+
+This is so that all filesystem checks are done with the user's
+uid/gid and not GlusterFS's uid/gid.
+
+### `MAKE_REAL_PATH`
+
+This macro concatenates the base directory of the posix volume
+('option directory') with the given path.
+
+### `need_xattr` in lookup
+
+If this flag is passed, lookup returns a xattr dictionary that contains
+the file's create time, the file's contents, and the version number
+of the file.
+
+This is a hack to increase small file performance. If an application
+wants to read a small file, it can finish its job with just a lookup
+call instead of a lookup followed by read.
+
+### `getdents`/`setdents`
+
+These are used by unify to set and get directory entries.
+
+### `ALIGN_BUF`
+
+Macro to align an address to a page boundary (4K).
+
+### `priv->export_statfs`
+
+In some cases, two exported volumes may reside on the same
+partition on the server. Sending statvfs info for both
+the volumes will lead to erroneous df output at the client,
+since free space on the partition will be counted twice.
+
+In such cases, user can disable exporting statvfs info
+on one of the volumes by setting this option.
+
+### `xattrop`
+
+This fop is used by replicate to set version numbers on files.
+
+### `getxattr`/`setxattr` hack to read/write files
+
+A key, `GLUSTERFS_FILE_CONTENT_STRING`, is handled in a special way by
+`getxattr`/`setxattr`. A getxattr with the key will return the entire
+content of the file as the value. A `setxattr` with the key will write
+the value as the entire content of the file.
+
+### `posix_checksum`
+
+This calculates a simple XOR checksum on all entry names in a
+directory that is used by unify to compare directory contents.
diff --git a/doc/hacker-guide/en-US/markdown/translator-development.md b/doc/hacker-guide/en-US/markdown/translator-development.md
new file mode 100644
index 000000000..77d1b606a
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/translator-development.md
@@ -0,0 +1,666 @@
+Translator development
+======================
+
+Setting the Stage
+-----------------
+
+This is the first post in a series that will explain some of the details of
+writing a GlusterFS translator, using some actual code to illustrate.
+
+Before we begin, a word about environments. GlusterFS is over 300K lines of
+code spread across a few hundred files. That's no Linux kernel or anything, but
+ you're still going to be navigating through a lot of code in every
+code-editing session, so some kind of cross-referencing is *essential*. I use
+cscope with the vim bindings, and if I couldn't do Crtl+G and such to jump
+between definitions all the time my productivity would be cut in half. You may
+prefer different tools, but as I go through these examples you'll need
+something functionally similar to follow on. OK, on with the show.
+
+The first thing you need to know is that translators are not just bags of
+functions and variables. They need to have a very definite internal structure
+so that the translator-loading code can figure out where all the pieces are.
+The way it does this is to use dlsym to look for specific names within your
+shared-object file, as follow (from `xlator.c`):
+
+```
+if (!(xl->fops = dlsym (handle, "fops"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->cbks = dlsym (handle, "cbks"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->init = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->fini = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
+ goto out;
+}
+```
+
+In this example, `xl` is a pointer to the in-memory object for the translator
+we're loading. As you can see, it's looking up various symbols *by name* in the
+ shared object it just loaded, and storing pointers to those symbols. Some of
+them (e.g. init are functions, while others e.g. fops are dispatch tables
+containing pointers to many functions. Together, these make up the translator's
+ public interface.
+
+Most of this glue or boilerplate can easily be found at the bottom of one of
+the source files that make up each translator. We're going to use the `rot-13`
+translator just for fun, so in this case you'd look in `rot-13.c` to see this:
+
+```
+struct xlator_fops fops = {
+ .readv = rot13_readv,
+ .writev = rot13_writev
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+{ .key = {"encrypt-write"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {NULL} },
+};
+```
+
+The `fops` table, defined in `xlator.h`, is one of the most important pieces.
+This table contains a pointer to each of the filesystem functions that your
+translator might implement -- `open`, `read`, `stat`, `chmod`, and so on. There
+are 82 such functions in all, but don't worry; any that you don't specify here
+will be see as null and filled with defaults from `defaults.c` when your
+translator is loaded. In this particular example, since `rot-13` is an
+exceptionally simple translator, we only fill in two entries for `readv` and
+`writev`.
+
+There are actually two other tables, also required to have predefined names,
+that are also used to find translator functions: `cbks` (which is empty in this
+ snippet) and `dumpops` (which is missing entirely). The first of these specify
+ entry points for when inodes are forgotten or file descriptors are released.
+In other words, they're destructors for objects in which your translator might
+ have an interest. Mostly you can ignore them, because the default behavior
+handles even the simpler cases of translator-specific inode/fd context
+automatically. However, if the context you attach is a complex structure
+requiring complex cleanup, you'll need to supply these functions. As for
+dumpops, that's just used if you want to provide functions to pretty-print
+various structures in logs. I've never used it myself, though I probably
+should. What's noteworthy here is that we don't even define dumpops. That's
+because all of the functions that might use these dispatch functions will check
+ for `xl->dumpops` being `NULL` before calling through it. This is in sharp
+contrast to the behavior for `fops` and `cbks1`, which *must* be present. If
+they're not, translator loading will fail because these pointers are not
+checked every time and if they're `NULL` then we'll segfault. That's why we
+provide an empty definition for cbks; it's OK for the individual function
+pointers to be NULL, but not for the whole table to be absent.
+
+The last piece I'll cover today is options. As you can see, this is a table of
+translator-specific option names and some information about their types.
+GlusterFS actually provides a pretty rich set of types (`volume_option_type_t`
+in `options.`h) which includes paths, translator names, percentages, and times
+in addition to the obvious integers and strings. Also, the `volume_option_t`
+structure can include information about alternate names, min/max/default
+values, enumerated string values, and descriptions. We don't see any of these
+here, so let's take a quick look at some more complex examples from afr.c and
+then come back to `rot-13`.
+
+```
+{ .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source.",
+ .value = { "diff", "full", "" }
+},
+{ .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which "
+ "self-heal process would be applied simultaneously."
+},
+```
+
+When your translator is loaded, all of this information is used to parse the
+options actually provided in the volfile, and then the result is turned into a
+dictionary and stored as `xl->options`. This dictionary is then processed by
+your init function, which you can see being looked up in the first code
+fragment above. We're only going to look at a small part of the `rot-13`'s
+init for now.
+
+```
+priv->decrypt_read = 1;
+priv->encrypt_write = 1;
+
+data = dict_get (this->options, "encrypt-write");
+if (data) {
+ if (gf_string2boolean (data->data, &priv->encrypt_write
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "encrypt-write takes only boolean options");
+ return -1;
+ }
+}
+```
+
+What we can see here is that we're setting some defaults in our priv structure,
+then looking to see if an `encrypt-write` option was actually provided. If so,
+we convert and store it. This is a pretty classic use of dict_get to fetch a
+field from a dictionary, and of using one of many conversion functions in
+`common-utils.c` to convert `data->data` into something we can use.
+
+So far we've covered the basic of how a translator gets loaded, how we find its
+various parts, and how we process its options. In my next Translator 101 post,
+we'll go a little deeper into other things that init and its companion fini
+might do, and how some other fields in our `xlator_t` structure (commonly
+referred to as this) are commonly used.
+
+`init`, `fini`, and private context
+-----------------------------------
+
+In the previous Translator 101 post, we looked at some of the dispatch tables
+and options processing in a translator. This time we're going to cover the rest
+ of the "shell" of a translator -- i.e. the other global parts not specific to
+handling a particular request.
+
+Let's start by looking at the relationship between a translator and its shared
+library. At a first approximation, this is the relationship between an object
+and a class in just about any object-oriented programming language. The class
+defines behaviors, but has to be instantiated as an object to have any kind of
+existence. In our case the object is an `xlator_t`. Several of these might be
+created within the same daemon, sharing all of the same code through init/fini
+and dispatch tables, but sharing *no data*. You could implement shared data (as
+ static variables in your shared libraries) but that's strongly discouraged.
+Every function in your shared library will get an `xlator_t` as an argument,
+and should use it. This lack of class-level data is one of the points where
+the analogy to common OOP systems starts to break down. Another place is the
+complete lack of inheritance. Translators inherit behavior (code) from exactly
+one shared library -- looked up and loaded using the `type` field in a volfile
+`volume ... end-volume` block -- and that's it -- not even single inheritance,
+no subclasses or superclasses, no mixins or prototypes, just the relationship
+between an object and its class. With that in mind, let's turn to the init
+function that we just barely touched on last time.
+
+```
+int32_t
+init (xlator_t *this)
+{
+ data_t *data = NULL;
+ rot_13_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log ("rot13", GF_LOG_ERROR,
+ "FATAL: rot13 should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+```
+
+At the very top, we see the function signature -- we get a pointer to the
+`xlator_t` object that we're initializing, and we return an `int32_t` status.
+As with most functions in the translator API, this should be zero to indicate
+success. In this case it's safe to return -1 for failure, but watch out: in
+dispatch-table functions, the return value means the status of the *function
+call* rather than the *request*. A request error should be reflected as a
+callback with a non-zero `op_re`t value, but the dispatch function itself
+should still return zero. In fact, the handling of a non-zero return from a
+dispatch function is not all that robust (we recently had a bug report in
+HekaFS related to this) so it's something you should probably avoid
+altogether. This only underscores the difference between dispatch functions
+and `init`/`fini` functions, where non-zero returns *are* expected and handled
+logically by aborting the translator setup. We can see that down at the
+bottom, where we return -1 to indicate that we couldn't allocate our
+private-data area (more about that later).
+
+The first thing this init function does is check that the translator is being
+set up in the right kind of environment. Translators are called by parents and
+in turn call children. Some translators are "initial" translators that inject
+requests into the system from elsewhere -- e.g. mount/fuse injecting requests
+from the kernel, protocol/server injecting requests from the network. Those
+translators don't need parents, but `rot-13` does and so we check for that.
+Similarly, some translators are "final" translators that (from the perspective
+of the current process) terminate requests instead of passing them on -- e.g.
+`protocol/client` passing them to another node, `storage/posix` passing them to
+a local filesystem. Other translators "multiplex" between multiple children --
+ passing each parent request on to one (`cluster/dht`), some
+(`cluster/stripe`), or all (`cluster/afr`) of those children. `rot-13` fits
+into none of those categories either, so it checks that it has *exactly one*
+child. It might be more convenient or robust if translator shared libraries
+had standard variables describing these requirements, to be checked in a
+consistent way by the translator-loading infrastructure itself instead of by
+each separate init function, but this is the way translators work today.
+
+The last thing we see in this fragment is allocating our private data area.
+This can literally be anything we want; the infrastructure just provides the
+priv pointer as a convenience but takes no responsibility for how it's used. In
+ this case we're using `GF_CALLOC` to allocate our own `rot_13_private_t`
+structure. This gets us all the benefits of GlusterFS's memory-leak detection
+infrastructure, but the way we're calling it is not quite ideal. For one thing,
+ the first two arguments -- from `calloc(3)` -- are kind of reversed. For
+another, notice how the last argument is zero. That can actually be an
+enumerated value, to tell the GlusterFS allocator *what* type we're
+allocating. This can be very useful information for memory profiling and leak
+detection, so it's recommended that you follow the example of any
+x`xx-mem-types.h` file elsewhere in the source tree instead of just passing
+zero here (even though that works).
+
+To finish our tour of standard initialization/termination, let's look at the
+end of `init` and the beginning of `fini`:
+
+```
+ this->private = priv;
+ gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ rot_13_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+```
+
+At the end of init we're just storing our private-data pointer in the `priv`
+field of our `xlator_t`, then returning zero to indicate that initialization
+succeeded. As is usually the case, our fini is even simpler. All it really has
+to do is `GF_FREE` our private-data pointer, which we do in a slightly
+roundabout way here. Notice how we don't even have a return value here, since
+there's nothing obvious and useful that the infrastructure could do if `fini`
+failed.
+
+That's practically everything we need to know to get our translator through
+loading, initialization, options processing, and termination. If we had defined
+ no dispatch functions, we could actually configure a daemon to use our
+translator and it would work as a basic pass-through from its parent to a
+single child. In the next post I'll cover how to build the translator and
+configure a daemon to use it, so that we can actually step through it in a
+debugger and see how it all fits together before we actually start adding
+functionality.
+
+This Time For Real
+------------------
+
+In the first two parts of this series, we learned how to write a basic
+translator skeleton that can get through loading, initialization, and option
+processing. This time we'll cover how to build that translator, configure a
+volume to use it, and run the glusterfs daemon in debug mode.
+
+Unfortunately, there's not much direct support for writing new translators. You
+can check out a GlusterFS tree and splice in your own translator directory, but
+ that's a bit painful because you'll have to update multiple makefiles plus a
+bunch of autoconf garbage. As part of the HekaFS project, I basically reverse
+engineered the truly necessary parts of the translator-building process and
+then pestered one of the Fedora glusterfs package maintainers (thanks
+daMaestro!) to add a `glusterfs-devel` package with the required headers. Since
+ then the complexity level in the HekaFS tree has crept back up a bit, but I
+still remember the simple method and still consider it the easiest way to get
+started on a new translator. For the sake of those not using Fedora, I'm going
+to describe a method that doesn't depend on that header package. What it does
+depend on is a GlusterFS source tree, much as you might have cloned from GitHub
+ or the Gluster review site. This tree doesn't have to be fully built, but you
+do need to run `autogen.sh` and configure in it. Then you can take the
+following simple makefile and put it in a directory with your actual source.
+
+```
+# Change these to match your source code.
+TARGET = rot-13.so
+OBJECTS = rot-13.o
+
+# Change these to match your environment.
+GLFS_SRC = /srv/glusterfs
+GLFS_LIB = /usr/lib64
+HOST_OS = GF_LINUX_HOST_OS
+
+# You shouldn't need to change anything below here.
+
+CFLAGS = -fPIC -Wall -O0 -g \
+ -DHAVE_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+ -D$(HOST_OS) -I$(GLFS_SRC) -I$(GLFS_SRC)/contrib/uuid \
+ -I$(GLFS_SRC)/libglusterfs/src
+LDFLAGS = -shared -nostartfiles -L$(GLFS_LIB) -lglusterfs \
+ -lpthread
+
+$(TARGET): $(OBJECTS)
+ $(CC) $(OBJECTS) $(LDFLAGS) -o $(TARGET)
+```
+
+Yes, it's still Linux-specific. Mea culpa. As you can see, we're sticking with
+the `rot-13` example, so you can just copy the files from
+`xlators/encryption/rot-13/src` in your GlusterFS tree to follow on. Type
+`make` and you should be rewarded with a nice little `.so` file.
+
+```
+xlator_example$ ls -l rot-13.so
+-rwxr-xr-x. 1 jeff jeff 40784 Nov 16 16:41 rot-13.so
+```
+
+Notice that we've built with optimization level zero and debugging symbols
+included, which would not typically be the case for a packaged version of
+GlusterFS. Let's put our version of `rot-13.so` into a slightly different file
+on our system, so that it doesn't stomp on the installed version (not that
+you'd ever want to use that anyway).
+
+```
+xlator_example# ls /usr/lib64/glusterfs/3git/xlator/encryption/
+crypt.so crypt.so.0 crypt.so.0.0.0 rot-13.so rot-13.so.0
+rot-13.so.0.0.0
+xlator_example# cp rot-13.so \
+ /usr/lib64/glusterfs/3git/xlator/encryption/my-rot-13.so
+```
+
+These paths represent the current Gluster filesystem layout, which is likely to
+be deprecated in favor of the Fedora layout; your paths may vary. At this point
+ we're ready to configure a volume using our new translator. To do that, I'm
+going to suggest something that's strongly discouraged except during
+development (the Gluster guys are going to hate me for this): write our own
+volfile. Here's just about the simplest volfile you'll ever see.
+
+```
+volume my-posix
+ type storage/posix
+ option directory /srv/export
+end-volume
+
+volume my-rot13
+ type encryption/my-rot-13
+ subvolumes my-posix
+end-volume
+```
+
+All we have here is a basic brick using `/srv/export` for its data, and then
+an instance of our translator layered on top -- no client or server is
+necessary for what we're doing, and the system will automatically push a
+mount/fuse translator on top if there's no server translator. To try this out,
+all we need is the following command (assuming the directories involved already
+ exist).
+
+```
+xlator_example$ glusterfs --debug -f my.vol /srv/import
+```
+
+You should be rewarded with a whole lot of log output, including the text of
+the volfile (this is very useful for debugging problems in the field). If you
+go to another window on the same machine, you can see that you have a new
+filesystem mounted.
+
+```
+~$ df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/srv/xlator_example/my.vol
+ 114506240 2706176 105983488 3% /srv/import
+```
+
+Just for fun, write something into a file in `/srv/import`, then look at the
+corresponding file in `/srv/export` to see it all `rot-13`'ed for you.
+
+```
+~$ echo hello > /srv/import/a_file
+~$ cat /srv/export/a_file
+uryyb
+```
+
+There you have it -- functionality you control, implemented easily, layered on
+top of local storage. Now you could start adding functionality -- real
+encryption, perhaps -- and inevitably having to debug it. You could do that the
+ old-school way, with `gf_log` (preferred) or even plain old `printf`, or you
+could run daemons under `gdb` instead. Alternatively, you could wait for the
+next Translator 101 post, where we'll be doing exactly that.
+
+Debugging a Translator
+----------------------
+
+Now that we've learned what a translator looks like and how to build one, it's
+time to run one and actually watch it work. The best way to do this is good
+old-fashioned `gdb`, as follows (using some of the examples from last time).
+
+```
+xlator_example# gdb glusterfs
+GNU gdb (GDB) Red Hat Enterprise Linux (7.2-50.el6)
+...
+(gdb) r --debug -f my.vol /srv/import
+Starting program: /usr/sbin/glusterfs --debug -f my.vol /srv/import
+...
+[2011-11-23 11:23:16.495516] I [fuse-bridge.c:2971:fuse_init]
+ 0-glusterfs-fuse: FUSE inited with protocol versions:
+ glusterfs 7.13 kernel 7.13
+```
+
+If you get to this point, your glusterfs client process is already running. You
+can go to another window to see the mountpoint, do file operations, etc.
+
+```
+~# df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/root/xlator_example/my.vol
+ 114506240 2643968 106045568 3% /srv/import
+~# ls /srv/import
+a_file
+~# cat /srv/import/a_file
+hello
+```
+
+Now let's interrupt the process and see where we are.
+
+```
+^C
+Program received signal SIGINT, Interrupt.
+0x0000003a0060b3dc in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+(gdb) info threads
+ 5 Thread 0x7fffeffff700 (LWP 27206) 0x0000003a002dd8c7
+ in readv ()
+ from /lib64/libc.so.6
+ 4 Thread 0x7ffff50e3700 (LWP 27205) 0x0000003a0060b75b
+ in pthread_cond_timedwait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 3 Thread 0x7ffff5f02700 (LWP 27204) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 2 Thread 0x7ffff6903700 (LWP 27203) 0x0000003a0060f245
+ in sigwait ()
+ from /lib64/libpthread.so.0
+* 1 Thread 0x7ffff7957700 (LWP 27196) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+```
+
+Like any non-toy server, this one has multiple threads. What are they all
+doing? Honestly, even I don't know. Thread 1 turns out to be in
+`event_dispatch_epoll`, which means it's the one handling all of our network
+I/O. Note that with socket multi-threading patch this will change, with one
+thread in `socket_poller` per connection. Thread 2 is in `glusterfs_sigwaiter`
+which means signals will be isolated to that thread. Thread 3 is in
+`syncenv_task`, so it's a worker process for synchronous requests such as
+those used by the rebalance and repair code. Thread 4 is in
+`janitor_get_next_fd`, so it's waiting for a chance to close no-longer-needed
+file descriptors on the local filesystem. (I admit I had to look that one up,
+BTW.) Lastly, thread 5 is in `fuse_thread_proc`, so it's the one fetching
+requests from our FUSE interface. You'll often see many more threads than
+this, but it's a pretty good basic set. Now, let's set a breakpoint so we can
+actually watch a request.
+
+```
+(gdb) b rot13_writev
+Breakpoint 1 at 0x7ffff50e4f0b: file rot-13.c, line 119.
+(gdb) c
+Continuing.
+```
+
+At this point we go into our other window and do something that will involve a write.
+
+```
+~# echo goodbye > /srv/import/another_file
+(back to the first window)
+[Switching to Thread 0x7fffeffff700 (LWP 27206)]
+
+Breakpoint 1, rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1, offset=0,
+ iobref=0x7fffe8001070) at rot-13.c:119
+119 rot_13_private_t *priv = (rot_13_private_t *)this->private;
+```
+
+Remember how we built with debugging symbols enabled and no optimization? That
+will be pretty important for the next few steps. As you can see, we're in
+`rot13_writev`, with several parameters.
+
+* `frame` is our always-present frame pointer for this request. Also,
+ `frame->local` will point to any local data we created and attached to the
+ request ourselves.
+* `this` is a pointer to our instance of the `rot-13` translator. You can examine
+ it if you like to see the name, type, options, parent/children, inode table,
+ and other stuff associated with it.
+* `fd` is a pointer to a file-descriptor *object* (`fd_t`, not just a
+ file-descriptor index which is what most people use "fd" for). This in turn
+ points to an inode object (`inode_t`) and we can associate our own
+ `rot-13`-specific data with either of these.
+* `vector` and `count` together describe the data buffers for this write, which
+ we'll get to in a moment.
+* `offset` is the offset into the file at which we're writing.
+* `iobref` is a buffer-reference object, which is used to track the life cycle
+ of buffers containing read/write data. If you look closely, you'll notice that
+ `vector[0].iov_base` points to the same address as `iobref->iobrefs[0].ptr`, which
+ should give you some idea of the inter-relationships between vector and iobref.
+
+OK, now what about that `vector`? We can use it to examine the data being
+written, like this.
+
+```
+(gdb) p vector[0]
+$2 = {iov_base = 0x7ffff7936000, iov_len = 8}
+(gdb) x/s 0x7ffff7936000
+0x7ffff7936000: "goodbye\n"
+```
+
+It's not always safe to view this data as a string, because it might just as
+well be binary data, but since we're generating the write this time it's safe
+and convenient. With that knowledge, let's step through things a bit.
+
+```
+(gdb) s
+120 if (priv->encrypt_write)
+(gdb)
+121 rot13_iovec (vector, count);
+(gdb)
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb)
+58 rot13 (vector[i].iov_base, vector[i].iov_len);
+(gdb)
+rot13 (buf=0x7ffff7936000 "goodbye\n", len=8) at rot-13.c:45
+45 for (i = 0; i < len; i++) {
+(gdb)
+46 if (buf[i] >= 'a' && buf[i] <= 'z')
+(gdb)
+47 buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
+```
+
+Here we've stepped into `rot13_iovec`, which iterates through our vector
+calling `rot13`, which in turn iterates through the characters in that chunk
+doing the `rot-13` operation if/as appropriate. This is pretty straightforward
+stuff, so let's skip to the next interesting bit.
+
+```
+(gdb) fin
+Run till exit from #0 rot13 (buf=0x7ffff7936000 "goodbye\n",
+ len=8) at rot-13.c:47
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb) fin
+Run till exit from #0 rot13_iovec (vector=0x7fffe8000cd8,
+ count=1) at rot-13.c:57
+rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1,
+ offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+123 STACK_WIND (frame,
+(gdb) b 129
+Breakpoint 2 at 0x7ffff50e4f35: file rot-13.c, line 129.
+(gdb) b rot13_writev_cbk
+Breakpoint 3 at 0x7ffff50e4db3: file rot-13.c, line 106.
+(gdb) c
+```
+
+So we've set breakpoints on both the callback and the statement following the
+`STACK_WIND`. Which one will we hit first?
+
+```
+Breakpoint 3, rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+106 STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf);
+(gdb) bt
+#0 rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+#1 0x00007ffff52f1b37 in posix_writev (frame=0x7ffff6e440d8,
+ this=<value optimized out>, fd=<value optimized out>,
+ vector=<value optimized out>, count=1,
+ offset=<value optimized out>, iobref=0x7fffe8001070)
+ at posix.c:2217
+#2 0x00007ffff50e513e in rot13_writev (frame=0x7ffff6e4402c,
+ this=0x638440, fd=0x7ffff409802c, vector=0x7fffe8000cd8,
+ count=1, offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+```
+
+Surprise! We're in `rot13_writev_cbk` now, called (indirectly) while we're
+still in `rot13_writev` before `STACK_WIND` returns (still at rot-13.c:123). If
+ you did any request cleanup here, then you need to be careful about what you
+do in the remainder of `rot13_writev` because data may have been freed etc.
+It's tempting to say you should just do the cleanup in `rot13_writev` after
+the `STACK_WIND,` but that's not valid because it's also possible that some
+other translator returned without calling `STACK_UNWIND` -- i.e. before
+`rot13_writev` is called, so then it would be the one getting null-pointer
+errors instead. To put it another way, the callback and the return from
+`STACK_WIND` can occur in either order or even simultaneously on different
+threads. Even if you were to use reference counts, you'd have to make sure to
+use locking or atomic operations to avoid races, and it's not worth it. Unless
+you *really* understand the possible flows of control and know what you're
+doing, it's better to do cleanup in the callback and nothing after
+`STACK_WIND.`
+
+At this point all that's left is a `STACK_UNWIND` and a return. The
+`STACK_UNWIND` invokes our parent's completion callback, and in this case our
+parent is FUSE so at that point the VFS layer is notified of the write being
+complete. Finally, we return through several levels of normal function calls
+until we come back to fuse_thread_proc, which waits for the next request.
+
+So that's it. For extra fun, you might want to repeat this exercise by stepping
+through some other call -- stat or setxattr might be good choices -- but you'll
+ have to use a translator that actually implements those calls to see much
+that's interesting. Then you'll pretty much know everything I knew when I
+started writing my first for-real translators, and probably even a bit more. I
+hope you've enjoyed this series, or at least found it useful, and if you have
+any suggestions for other topics I should cover please let me know (via
+comments or email, IRC or Twitter).
diff --git a/doc/hacker-guide/en-US/markdown/write-behind.md b/doc/hacker-guide/en-US/markdown/write-behind.md
new file mode 100644
index 000000000..e20682249
--- /dev/null
+++ b/doc/hacker-guide/en-US/markdown/write-behind.md
@@ -0,0 +1,56 @@
+performance/write-behind translator
+===================================
+
+Basic working
+--------------
+
+Write behind is basically a translator to lie to the application that the
+write-requests are finished, even before it is actually finished.
+
+On a regular translator tree without write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed upto client-protocol or storage translator.
+5. client-protocol, on receiving reply from server, starts `STACK_UNWIND()` towards the fuse-bridge.
+
+On a translator tree with write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed upto write-behind translator.
+5. write-behind adds the write buffer to its internal queue and does a `STACK_UNWIND()` towards the fuse-bridge.
+
+write call is completed in application's percepective. after
+`STACK_UNWIND()`ing towards the fuse-bridge, write-behind initiates a fresh
+writev() call to its child translator, whose replies will be consumed by
+write-behind itself. Write-behind _doesn't_ cache the write buffer, unless
+`option flush-behind on` is specified in volume specification file.
+
+Windowing
+---------
+
+With respect to write-behind, each write-buffer has three flags: `stack_wound`, `write_behind` and `got_reply`.
+
+* `stack_wound`: if set, indicates that write-behind has initiated `STACK_WIND()` towards child translator.
+* `write_behind`: if set, indicates that write-behind has done `STACK_UNWIND()` towards fuse-bridge.
+* `got_reply`: if set, indicates that write-behind has received reply from child translator for a `writev()` `STACK_WIND()`. a request will be destroyed by write-behind only if this flag is set.
+
+Currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
+
+window size limits the aggregate size of currently pending write requests. once
+the pending requests' size has reached the window size, write-behind blocks
+writev() calls from fuse-bridge. Blocking is only from application's
+perspective. Write-behind does `STACK_WIND()` to child translator
+straight-away, but hold behind the `STACK_UNWIND()` towards fuse-bridge.
+`STACK_UNWIND()` is done only once write-behind gets enough replies to
+accomodate for currently blocked request.
+
+Flush behind
+------------
+
+If `option flush-behind on` is specified in volume specification file, then
+write-behind sends aggregate write requests to child translator, instead of
+regular per request `STACK_WIND()`s.
diff --git a/doc/user-guide/legacy/Makefile.am b/doc/legacy/Makefile.am
index b2caabaa2..b2caabaa2 100644
--- a/doc/user-guide/legacy/Makefile.am
+++ b/doc/legacy/Makefile.am
diff --git a/doc/user-guide/legacy/advanced-stripe.odg b/doc/legacy/advanced-stripe.odg
index 7686d7091..7686d7091 100644
--- a/doc/user-guide/legacy/advanced-stripe.odg
+++ b/doc/legacy/advanced-stripe.odg
Binary files differ
diff --git a/doc/user-guide/legacy/advanced-stripe.pdf b/doc/legacy/advanced-stripe.pdf
index ec8b03dcf..ec8b03dcf 100644
--- a/doc/user-guide/legacy/advanced-stripe.pdf
+++ b/doc/legacy/advanced-stripe.pdf
Binary files differ
diff --git a/doc/user-guide/legacy/colonO-icon.jpg b/doc/legacy/colonO-icon.jpg
index 3e66f7a27..3e66f7a27 100644
--- a/doc/user-guide/legacy/colonO-icon.jpg
+++ b/doc/legacy/colonO-icon.jpg
Binary files differ
diff --git a/doc/user-guide/legacy/fdl.texi b/doc/legacy/fdl.texi
index e33c687cd..e33c687cd 100644
--- a/doc/user-guide/legacy/fdl.texi
+++ b/doc/legacy/fdl.texi
diff --git a/doc/user-guide/legacy/fuse.odg b/doc/legacy/fuse.odg
index 61bd103c7..61bd103c7 100644
--- a/doc/user-guide/legacy/fuse.odg
+++ b/doc/legacy/fuse.odg
Binary files differ
diff --git a/doc/user-guide/legacy/fuse.pdf b/doc/legacy/fuse.pdf
index a7d13faff..a7d13faff 100644
--- a/doc/user-guide/legacy/fuse.pdf
+++ b/doc/legacy/fuse.pdf
Binary files differ
diff --git a/doc/user-guide/legacy/ha.odg b/doc/legacy/ha.odg
index e4b8b72d0..e4b8b72d0 100644
--- a/doc/user-guide/legacy/ha.odg
+++ b/doc/legacy/ha.odg
Binary files differ
diff --git a/doc/user-guide/legacy/ha.pdf b/doc/legacy/ha.pdf
index e372c0ab0..e372c0ab0 100644
--- a/doc/user-guide/legacy/ha.pdf
+++ b/doc/legacy/ha.pdf
Binary files differ
diff --git a/doc/legacy/hacker-guide/adding-fops.txt b/doc/legacy/hacker-guide/adding-fops.txt
deleted file mode 100644
index e70dbbdc8..000000000
--- a/doc/legacy/hacker-guide/adding-fops.txt
+++ /dev/null
@@ -1,33 +0,0 @@
- HOW TO ADD A NEW FOP TO GlusterFS
- =================================
-
-Steps to be followed when adding a new FOP to GlusterFS:
-
-1. Edit glusterfs.h and add a GF_FOP_* constant.
-
-2. Edit xlator.[ch] and:
- 2a. add the new prototype for fop and callback.
- 2b. edit xlator_fops structure.
-
-3. Edit xlator.c and add to fill_defaults.
-
-4. Edit protocol.h and add struct necessary for the new FOP.
-
-5. Edit defaults.[ch] and provide default implementation.
-
-6. Edit call-stub.[ch] and provide stub implementation.
-
-7. Edit common-utils.c and add to gf_global_variable_init().
-
-8. Edit client-protocol and add your FOP.
-
-9. Edit server-protocol and add your FOP.
-
-10. Implement your FOP in any translator for which the default implementation
- is not sufficient.
-
-==========================================
-Last updated: Mon Oct 27 21:35:49 IST 2008
-
-Author: Vikas Gorur <vikas@gluster.com>
-==========================================
diff --git a/doc/legacy/hacker-guide/bdb.txt b/doc/legacy/hacker-guide/bdb.txt
deleted file mode 100644
index 1a80be813..000000000
--- a/doc/legacy/hacker-guide/bdb.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-
-* How does file translates to key/value pair?
----------------------------------------------
-
- in bdb a file is identified by key (obtained by taking basename() of the path of
-the file) and file contents are stored as value corresponding to the key in database
-file (defaults to glusterfs_storage.db under dirname() directory).
-
-* symlinks, directories
------------------------
-
- symlinks and directories are stored as is.
-
-* db (database) files
----------------------
-
- every directory, including root directory, contains a database file called
-glusterfs_storage.db. all the regular files contained in the directory are stored
-as key/value pair inside the glusterfs_storage.db.
-
-* internal data cache
----------------------
-
- db does not provide a way to find out the size of the value corresponding to a key.
-so, bdb makes DB->get() call for key and takes the length of the value returned.
-since DB->get() also returns file contents for key, bdb maintains an internal cache and
-stores the file contents in the cache.
- every directory maintains a seperate cache.
-
-* inode number transformation
------------------------------
-
- bdb allocates a inode number to each file and directory on its own. bdb maintains a
-global counter and increments it after allocating inode number for each file
-(regular, symlink or directory). NOTE: bdb does not guarantee persistent inode numbers.
-
-* checkpoint thread
--------------------
-
- bdb creates a checkpoint thread at the time of init(). checkpoint thread does a
-periodic checkpoint on the DB_ENV. checkpoint is the mechanism, provided by db, to
-forcefully commit the logged transactions to the storage.
-
-NOTES ABOUT FOPS:
------------------
-
-lookup() -
- 1> do lstat() on the path, if lstat fails, we assume that the file being looked up
- is either a regular file or doesn't exist.
- 2> lookup in the DB of parent directory for key corresponding to path. if key exists,
- return key, with.
- NOTE: 'struct stat' stat()ed from DB file is used as a container for 'struct stat'
- of the regular file. st_ino, st_size, st_blocks are updated with file's values.
-
-readv() -
- 1> do a lookup in bctx cache. if successful, return the requested data from cache.
- 2> if cache missed, do a DB->get() the entire file content and insert to cache.
-
-writev():
- 1> flush any cached content of this file.
- 2> do a DB->put(), with DB_DBT_PARTIAL flag.
- NOTE: DB_DBT_PARTIAL is used to do partial update of a value in DB.
-
-readdir():
- 1> regular readdir() in a loop, and vomit all DB_ENV log files and DB files that
- we encounter.
- 2> if the readdir() buffer still has space, open a DB cursor and do a sequential
- DBC->get() to fill the reaadir buffer.
-
-
diff --git a/doc/legacy/hacker-guide/lock-ahead.txt b/doc/legacy/hacker-guide/lock-ahead.txt
deleted file mode 100644
index 70aa452d3..000000000
--- a/doc/legacy/hacker-guide/lock-ahead.txt
+++ /dev/null
@@ -1,80 +0,0 @@
- Lock-ahead translator
- ---------------------
-
-The objective of the lock-ahead translator is to speculatively
-hold locks (inodelk and entrylk) on the universal set (0 - infinity
-in case of inodelk and all basenames in case of entrylk) even
-when a lock is requested only on a subset, in anticipation that
-further locks will be requested within the same universal set.
-
-So, for example, when cluster/replicate locks a region before
-writing to it, lock-ahead would instead lock the entire file.
-On further writes, lock-ahead can immediately return success for
-the lock requests, since the entire file has been previously locked.
-
-To avoid starvation of other clients/mountpoints, we employ a
-notify mechanism, described below.
-
-typedef struct {
- struct list_head subset_locks;
-} la_universal_lock_t;
-
-Universal lock structure is stored in the inode context.
-
-typedef struct {
- enum {LOCK_AHEAD_ENTRYLK, LOCK_AHEAD_FENTRYLK,
- LOCK_AHEAD_INODELK, LOCK_AHEAD_FINODELK};
-
- union {
- fd_t *fd;
- loc_t loc;
- };
-
- off_t l_start;
- off_t l_len;
-
- const char *basename;
-
- struct list_head universal_lock;
-} la_subset_lock_t;
-
-
-fops implemented:
-
-* inodelk/finodelk/entrylk/fentrylk:
-
-lock:
- if universal lock held:
- add subset to it (save loc_t or fd) and return success
- else:
- send lock-notify fop
- hold universal lock and return
- (set inode context, add subset to it, save loc_t or fd)
-
- if this fails:
- forward the lock request
-
-unlock:
- if subset exists in universal lock:
- delete subset lock from list
- else:
- forward it
-
-* release:
- hold subset locks (each subset lock using the saved loc_t or fd)
- and release universal lock
-
-* lock-notify (on unwind) (new fop)
- hold subset locks and release universal lock
-
-
-lock-notify in locks translator:
-
-if a subset lock in entrylk/inodelk cannot be satisfied
-because of a universal lock held by someone else:
- unwind the lock-notify fop
-
-==============================================
-$ Last updated: Tue Feb 17 11:31:18 IST 2009 $
-$ Author: Vikas Gorur <vikas@gluster.com> $
-==============================================
diff --git a/doc/legacy/hacker-guide/posix.txt b/doc/legacy/hacker-guide/posix.txt
deleted file mode 100644
index 7958af2ea..000000000
--- a/doc/legacy/hacker-guide/posix.txt
+++ /dev/null
@@ -1,59 +0,0 @@
----------------
-* storage/posix
----------------
-
-- SET_FS_ID
-
- This is so that all filesystem checks are done with the user's
- uid/gid and not GlusterFS's uid/gid.
-
-- MAKE_REAL_PATH
-
- This macro concatenates the base directory of the posix volume
- ('option directory') with the given path.
-
-- need_xattr in lookup
-
- If this flag is passed, lookup returns a xattr dictionary that contains
- the file's create time, the file's contents, and the version number
- of the file.
-
- This is a hack to increase small file performance. If an application
- wants to read a small file, it can finish its job with just a lookup
- call instead of a lookup followed by read.
-
-- getdents/setdents
-
- These are used by unify to set and get directory entries.
-
-- ALIGN_BUF
-
- Macro to align an address to a page boundary (4K).
-
-- priv->export_statfs
-
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
-
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
-
-- xattrop
-
- This fop is used by replicate to set version numbers on files.
-
-- getxattr/setxattr hack to read/write files
-
- A key, GLUSTERFS_FILE_CONTENT_STRING, is handled in a special way by
- getxattr/setxattr. A getxattr with the key will return the entire
- content of the file as the value. A setxattr with the key will write
- the value as the entire content of the file.
-
-- posix_checksum
-
- This calculates a simple XOR checksum on all entry names in a
- directory that is used by unify to compare directory contents.
-
-
diff --git a/doc/legacy/hacker-guide/write-behind.txt b/doc/legacy/hacker-guide/write-behind.txt
deleted file mode 100644
index 50b7d2a1d..000000000
--- a/doc/legacy/hacker-guide/write-behind.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-basic working
---------------
-
- write behind is basically a translator to lie to the application that the write-requests are finished, even before it is actually finished.
-
- on a regular translator tree without write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto client-protocol or storage translator.
- 5. client-protocol, on receiving reply from server, starts STACK_UNWIND() towards the fuse-bridge.
-
- on a translator tree with write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto write-behind translator.
- 5. write-behind adds the write buffer to its internal queue and does a STACK_UNWIND() towards the fuse-bridge.
-
- write call is completed in application's percepective. after STACK_UNWIND()ing towards the fuse-bridge, write-behind initiates a fresh writev() call to its child translator, whose replies will be consumed by write-behind itself. write-behind _doesn't_ cache the write buffer, unless 'option flush-behind on' is specified in volume specification file.
-
-windowing
----------
-
- write respect to write-behind, each write-buffer has three flags: 'stack_wound', 'write_behind' and 'got_reply'.
-
- stack_wound: if set, indicates that write-behind has initiated STACK_WIND() towards child translator.
-
- write_behind: if set, indicates that write-behind has done STACK_UNWIND() towards fuse-bridge.
-
- got_reply: if set, indicates that write-behind has received reply from child translator for a writev() STACK_WIND(). a request will be destroyed by write-behind only if this flag is set.
-
- currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
-
- window size limits the aggregate size of currently pending write requests. once the pending requests' size has reached the window size, write-behind blocks writev() calls from fuse-bridge.
- blocking is only from application's perspective. write-behind does STACK_WIND() to child translator straight-away, but hold behind the STACK_UNWIND() towards fuse-bridge. STACK_UNWIND() is done only once write-behind gets enough replies to accomodate for currently blocked request.
-
-flush behind
-------------
-
- if 'option flush-behind on' is specified in volume specification file, then write-behind sends aggregate write requests to child translator, instead of regular per request STACK_WIND()s.
-
-
diff --git a/doc/user-guide/legacy/stripe.odg b/doc/legacy/stripe.odg
index 79441bf14..79441bf14 100644
--- a/doc/user-guide/legacy/stripe.odg
+++ b/doc/legacy/stripe.odg
Binary files differ
diff --git a/doc/user-guide/legacy/stripe.pdf b/doc/legacy/stripe.pdf
index b94446feb..b94446feb 100644
--- a/doc/user-guide/legacy/stripe.pdf
+++ b/doc/legacy/stripe.pdf
Binary files differ
diff --git a/doc/user-guide/legacy/unify.odg b/doc/legacy/unify.odg
index ccaa9bf16..ccaa9bf16 100644
--- a/doc/user-guide/legacy/unify.odg
+++ b/doc/legacy/unify.odg
Binary files differ
diff --git a/doc/user-guide/legacy/unify.pdf b/doc/legacy/unify.pdf
index c22027f66..c22027f66 100644
--- a/doc/user-guide/legacy/unify.pdf
+++ b/doc/legacy/unify.pdf
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.info b/doc/legacy/user-guide.info
index 6a49d078d..2bbadb351 100644
--- a/doc/user-guide/legacy/user-guide.info
+++ b/doc/legacy/user-guide.info
@@ -397,8 +397,8 @@ command-line options accepted by it.
`--volfile-server-port=<port-number>'
Listening port number of volfile server.
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
+`--volfile-server-transport=[tcp|ib-verbs]'
+ Transport type to get volfile from server. [default: `tcp']
`--xlator-options=<volume-name.option=value>'
Add/override a translator option for a volume with specified value.
@@ -467,8 +467,8 @@ filesystem to appear. Example:
`--volfile-server-port=<port-number>'
Listening port number of volfile server.
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
+`--volfile-server-transport=[tcp|ib-verbs]'
+ Transport type to get volfile from server. [default: `tcp']
`--xlator-options=<volume-name.option=value>'
Add/override a translator option for a volume with specified value.
diff --git a/doc/user-guide/legacy/user-guide.pdf b/doc/legacy/user-guide.pdf
index ed7bd2a99..ed7bd2a99 100644
--- a/doc/user-guide/legacy/user-guide.pdf
+++ b/doc/legacy/user-guide.pdf
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.texi b/doc/legacy/user-guide.texi
index 2d51da022..8e429853f 100644
--- a/doc/user-guide/legacy/user-guide.texi
+++ b/doc/legacy/user-guide.texi
@@ -416,8 +416,8 @@ Advanced Options
@item --volfile-server-port=<port-number>
Listening port number of volfile server.
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
+@item --volfile-server-transport=[tcp|ib-verbs]
+ Transport type to get volfile from server. [default: @command{tcp}]
@item --xlator-options=<volume-name.option=value>
Add/override a translator option for a volume with specified value.
@@ -494,8 +494,8 @@ Advanced Options
@item --volfile-server-port=<port-number>
Listening port number of volfile server.
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
+@item --volfile-server-transport=[tcp|ib-verbs]
+ Transport type to get volfile from server. [default: @command{tcp}]
@item --xlator-options=<volume-name.option=value>
Add/override a translator option for a volume with specified value.
diff --git a/doc/user-guide/legacy/xlator.odg b/doc/legacy/xlator.odg
index 179a65f6e..179a65f6e 100644
--- a/doc/user-guide/legacy/xlator.odg
+++ b/doc/legacy/xlator.odg
Binary files differ
diff --git a/doc/user-guide/legacy/xlator.pdf b/doc/legacy/xlator.pdf
index a07e14d67..a07e14d67 100644
--- a/doc/user-guide/legacy/xlator.pdf
+++ b/doc/legacy/xlator.pdf
Binary files differ
diff --git a/doc/logging.txt b/doc/logging.txt
new file mode 100644
index 000000000..b4ee45996
--- /dev/null
+++ b/doc/logging.txt
@@ -0,0 +1,66 @@
+
+New logging framework in glusterfs is targeted for end users like
+customers, community members, testers etc. This aims to bring clear,
+understandable logs called user logs whereas the current logging are
+considered as developer logs. The new framework brings with following
+features
+
+* Each message is logged with proper well defined error code and each
+ error code has well known error message.
+* A logbook has defined error code and error messages. It helps to
+ keep track of possible causes and remedies
+* Log are sent to syslog. The syslog application can be configured to
+ pass them to centralized logging system
+* It brings
+ - Remove repeated log messages
+ - Send alerts to users on certain events
+ - Run a program on events
+ - Call home service on events
+
+
+Log book:
+=========
+A log book is a JSON formatted file error-codes.json located in top
+level of glusterfs source repository. At compile time, gen-headers.py
+generates libglusterfs/src/gf-error-codes.h using the log book and
+gf-error-codes.h.template file. libglusterfs/src/gf-error-codes.h
+consists of header definitions and helper functions to get message by
+code for given locale. Currently it has _gf_get_message() function
+returns message for locale 'en'.
+
+New entry to log book is added like
+
+{
+ "IO_ERROR": {"code": 2233,
+ "message": {"en": "I/O error occurred"}},
+ "SETUP_ERROR": {"code": 2240,
+ "message": {"en": "Setup error"}},
+}
+
+
+Logging:
+========
+The framework provides two functions
+
+void gf_openlog (const char *ident, int option, int facility);
+void gf_syslog (int error_code, int facility_priority, char *format, ...);
+
+Consumers need to call gf_openlog() prior to gf_syslog() like the way
+traditional syslog function calls. error_code is mandatory when using
+gf_syslog(). For example,
+
+gf_openlog (NULL, -1, -1);
+gf_syslog (GF_ERR_DEV, LOG_ERR, "error reading configuration file");
+
+The logs are sent in CEE format (http://cee.mitre.org/) to syslog.
+Its targeted to rsyslog syslog server.
+
+This log framework is enabled at compile time by default. This can be
+disabled by passing '--disable-syslog' to ./configure or '--without
+syslog' to rpmbuild
+
+Even though its enabled at compile time, its required to have
+/etc/glusterfs/logger.conf file to make it into effect before starting
+gluster services
+
+Currently all gluster logs are sent with error code GF_ERR_DEV.
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 10e1d59f0..01b7f7554 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -57,7 +48,7 @@ Mount the filesystem read-only
Volume key or name of the volume file to be fetched from server
.TP
\fBtransport=\fRTRANSPORT-TYPE
-Transport type to get volume file from server [default: socket]
+Transport type to get volume file from server [default: tcp]
.TP
\fBvolume\-name=\fRVOLUME-NAME
Volume name to be used for MOUNT-POINT [default: top most volume in
diff --git a/doc/qa/legacy/qa-client.vol b/doc/qa/legacy/qa-client.vol
deleted file mode 100644
index bcf242347..000000000
--- a/doc/qa/legacy/qa-client.vol
+++ /dev/null
@@ -1,170 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# 1st client
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
- option remote-host 127.0.0.1
- option remote-subvolume ra1
-end-volume
-
-# 2nd client
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra2
-end-volume
-
-# 3rd client
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra3
-end-volume
-
-# 4th client
-volume client4
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra4
-end-volume
-
-# 5th client
-volume client5
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra5
-end-volume
-
-# 6th client
-volume client6
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra6
-end-volume
-
-# 7th client
-volume client7
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra7
-end-volume
-
-# 8th client
-volume client8
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra8
-end-volume
-
-# 1st Stripe (client1 client2)
-volume stripe1
- type cluster/stripe
- subvolumes client1 client2
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 2st Stripe (client3 client4)
-volume stripe2
- type cluster/stripe
- subvolumes client3 client4
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 3st Stripe (client5 client6)
-volume stripe3
- type cluster/stripe
- subvolumes client5 client6
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 4st Stripe (client7 client8)
-volume stripe4
- type cluster/stripe
- subvolumes client7 client8
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-
-# 1st replicate
-volume replicate1
- type cluster/replicate
- subvolumes stripe1 stripe2
-end-volume
-
-# 2nd replicate
-volume replicate2
- type cluster/replicate
- subvolumes stripe3 stripe4
-end-volume
-
-volume ns
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option remote-subvolume brick-ns
-end-volume
-
-# Unify
-volume unify0
- type cluster/unify
- subvolumes replicate1 replicate2
-# subvolumes stripe1 stripe3
- option namespace ns
- option scheduler rr # random # alu # nufa
- option rr.limits.min-free-disk 1GB
-# option alu.order x
-# option alu.x.entry-threshold
-# option alu.x.exit-threshold
-end-volume
-
-
-# ==== Performance Translators ====
-# The default options for performance translators should be the best for 90+% of the cases
-volume iot
- type performance/io-threads
- subvolumes unify0
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-
-volume ioc
- type performance/io-cache
- subvolumes wb
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes ioc
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-client.vol b/doc/qa/legacy/qa-high-avail-client.vol
deleted file mode 100644
index 69cb8dd30..000000000
--- a/doc/qa/legacy/qa-high-avail-client.vol
+++ /dev/null
@@ -1,17 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost
- option transport.socket.remote-port 7001
- option remote-subvolume server1-iot
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes ra
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-server.vol b/doc/qa/legacy/qa-high-avail-server.vol
deleted file mode 100644
index 784e8d208..000000000
--- a/doc/qa/legacy/qa-high-avail-server.vol
+++ /dev/null
@@ -1,344 +0,0 @@
-
-# -- server 1 --
-volume server1-posix1
- type storage/posix
- option directory /tmp/ha-export1/
-end-volume
-
-volume server1-ns1
- type storage/posix
- option directory /tmp/ha-export-ns1/
-end-volume
-
-volume server1-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server1-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server1-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server1-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server1-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-posix1
-end-volume
-
-
-volume server1-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client2
-end-volume
-
-volume server1-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client3
-end-volume
-
-volume server1-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns1
-end-volume
-
-volume server1-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns2
-end-volume
-
-volume server1-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns3
-end-volume
-
-volume server1-ns-replicate
- type cluster/replicate
- subvolumes server1-ns-io1 server1-ns-io2 server1-ns-io3
-end-volume
-
-volume server1-storage-replicate
- type cluster/replicate
- subvolumes server1-io1 server1-io2 server1-io3
-end-volume
-
-volume server1-unify
- type cluster/unify
- #option self-heal off
- subvolumes server1-storage-replicate
- option namespace server1-ns-replicate
- option scheduler rr
-end-volume
-
-volume server1-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server1-unify
-end-volume
-
-volume server1
- type protocol/server
- option transport-type tcp
- subvolumes server1-iot
- option transport.socket.listen-port 7001
- option auth.addr.server1-posix1.allow *
- option auth.addr.server1-ns1.allow *
- option auth.addr.server1-iot.allow *
-end-volume
-
-
-# == Server2 ==
-volume server2-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server2-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server2-posix2
- type storage/posix
- option directory /tmp/ha-export2/
-end-volume
-
-volume server2-ns2
- type storage/posix
- option directory /tmp/ha-export-ns2/
-end-volume
-
-volume server2-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server2-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server2-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client1
-end-volume
-
-
-volume server2-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-posix2
-end-volume
-
-volume server2-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client3
-end-volume
-
-volume server2-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns1
-end-volume
-
-volume server2-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns2
-end-volume
-
-volume server2-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns3
-end-volume
-
-volume server2-ns-replicate
- type cluster/replicate
- subvolumes server2-ns-io1 server2-ns-io2 server2-ns-io3
-end-volume
-
-volume server2-storage-replicate
- type cluster/replicate
- subvolumes server2-io2 server2-io3 server2-io1
-end-volume
-
-volume server2-unify
- type cluster/unify
- option self-heal off
- subvolumes server2-storage-replicate
- option namespace server2-ns-replicate
- option scheduler rr
-end-volume
-
-volume server2-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server2-unify
-end-volume
-
-volume server2
- type protocol/server
- option transport-type tcp
- subvolumes server2-iot
- option transport.socket.listen-port 7002
- option auth.addr.server2-posix2.allow *
- option auth.addr.server2-ns2.allow *
- option auth.addr.server2-iot.allow *
-end-volume
-
-# == server 3 ==
-volume server3-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server3-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server3-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server3-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server3-posix3
- type storage/posix
- option directory /tmp/ha-export3/
-end-volume
-
-volume server3-ns3
- type storage/posix
- option directory /tmp/ha-export-ns3/
-end-volume
-
-volume server3-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client1
-end-volume
-
-
-volume server3-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client2
-end-volume
-
-volume server3-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-posix3
-end-volume
-
-volume server3-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns1
-end-volume
-
-volume server3-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns2
-end-volume
-
-volume server3-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns3
-end-volume
-
-volume server3-ns-replicate
- type cluster/replicate
- subvolumes server3-ns-io1 server3-ns-io2 server3-ns-io3
-end-volume
-
-volume server3-storage-replicate
- type cluster/replicate
- subvolumes server3-io3 server3-io2 server3-io1
-end-volume
-
-volume server3-unify
- type cluster/unify
- option self-heal off
- subvolumes server3-storage-replicate
- option namespace server3-ns-replicate
- option scheduler rr
-end-volume
-
-volume server3-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server3-unify
-end-volume
-
-volume server3
- type protocol/server
- option transport-type tcp
- subvolumes server3-iot
- option transport.socket.listen-port 7003
- option auth.addr.server3-posix3.allow *
- option auth.addr.server3-ns3.allow *
- option auth.addr.server3-iot.allow *
-end-volume
-
diff --git a/doc/qa/legacy/qa-server.vol b/doc/qa/legacy/qa-server.vol
deleted file mode 100644
index d948f701f..000000000
--- a/doc/qa/legacy/qa-server.vol
+++ /dev/null
@@ -1,284 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# Namespace posix
-volume brick-ns
- type storage/posix # POSIX FS translator
- option directory /tmp/export-ns # Export this directory
-end-volume
-
-# 1st server
-
-volume brick1
- type storage/posix # POSIX FS translator
- option directory /tmp/export1 # Export this directory
-end-volume
-
-# == Posix-Locks ==
- volume plocks1
- type features/posix-locks
-# option mandatory on
- subvolumes brick1
- end-volume
-
-volume iot1
- type performance/io-threads
- subvolumes plocks1 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb1
- type performance/write-behind
- subvolumes iot1
-# option <key> <value>
-end-volume
-
-volume ra1
- type performance/read-ahead
- subvolumes wb1
-# option <key> <value>
-end-volume
-
-volume brick2
- type storage/posix # POSIX FS translator
- option directory /tmp/export2 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash2
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick2
-# end-volume
-
-# == Posix-Locks ==
-volume plocks2
- type features/posix-locks
-# option <something> <something>
- subvolumes brick2
-end-volume
-
-volume iot2
- type performance/io-threads
- subvolumes plocks2 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb2
- type performance/write-behind
- subvolumes iot2
-# option <key> <value>
-end-volume
-
-volume ra2
- type performance/read-ahead
- subvolumes wb2
-# option <key> <value>
-end-volume
-
-volume brick3
- type storage/posix # POSIX FS translator
- option directory /tmp/export3 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash3
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick3
-# end-volume
-
-# == Posix-Locks ==
-volume plocks3
- type features/posix-locks
-# option <something> <something>
- subvolumes brick3
-end-volume
-
-volume iot3
- type performance/io-threads
- subvolumes plocks3 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb3
- type performance/write-behind
- subvolumes iot3
-# option <key> <value>
-end-volume
-
-volume ra3
- type performance/read-ahead
- subvolumes wb3
-# option <key> <value>
-end-volume
-
-volume brick4
- type storage/posix # POSIX FS translator
- option directory /tmp/export4 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks4
- type features/posix-locks
-# option <something> <something>
- subvolumes brick4
-end-volume
-
-volume iot4
- type performance/io-threads
- subvolumes plocks4 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb4
- type performance/write-behind
- subvolumes iot4
-# option <key> <value>
-end-volume
-
-volume ra4
- type performance/read-ahead
- subvolumes wb4
-# option <key> <value>
-end-volume
-
-volume brick5
- type storage/posix # POSIX FS translator
- option directory /tmp/export5 # Export this directory
-end-volume
-
-
-# == Posix-Locks ==
-volume plocks5
- type features/posix-locks
-# option <something> <something>
- subvolumes brick5
-end-volume
-
-volume iot5
- type performance/io-threads
- subvolumes plocks5 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb5
- type performance/write-behind
- subvolumes iot5
-# option <key> <value>
-end-volume
-
-volume ra5
- type performance/read-ahead
- subvolumes wb5
-# option <key> <value>
-end-volume
-
-volume brick6
- type storage/posix # POSIX FS translator
- option directory /tmp/export6 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks6
- type features/posix-locks
-# option <something> <something>
- subvolumes brick6
-end-volume
-
-volume iot6
- type performance/io-threads
- subvolumes plocks6 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb6
- type performance/write-behind
- subvolumes iot6
-# option <key> <value>
-end-volume
-
-volume ra6
- type performance/read-ahead
- subvolumes wb6
-# option <key> <value>
-end-volume
-
-volume brick7
- type storage/posix # POSIX FS translator
- option directory /tmp/export7 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks7
- type features/posix-locks
-# option <something> <something>
- subvolumes brick7
-end-volume
-
-volume iot7
- type performance/io-threads
- subvolumes plocks7 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb7
- type performance/write-behind
- subvolumes iot7
-# option <key> <value>
-end-volume
-
-volume ra7
- type performance/read-ahead
- subvolumes wb7
-# option <key> <value>
-end-volume
-
-volume brick8
- type storage/posix # POSIX FS translator
- option directory /tmp/export8 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks8
- type features/posix-locks
-# option <something> <something>
- subvolumes brick8
-end-volume
-
-volume iot8
- type performance/io-threads
- subvolumes plocks8 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb8
- type performance/write-behind
- subvolumes iot8
-# option <key> <value>
-end-volume
-
-volume ra8
- type performance/read-ahead
- subvolumes wb8
-# option <key> <value>
-end-volume
-
-volume server8
- type protocol/server
- subvolumes ra8 ra1 ra2 ra3 ra4 ra5 ra6 ra7 brick-ns
- option transport-type tcp # For TCP/IP transport
-# option transport-type ib-sdp # For Infiniband transport
-# option transport-type ib-verbs # For ib-verbs transport
- option client-volume-filename /examples/qa-client.vol
- option auth.addr.ra1.allow * # Allow access to "stat8" volume
- option auth.addr.ra2.allow * # Allow access to "stat8" volume
- option auth.addr.ra3.allow * # Allow access to "stat8" volume
- option auth.addr.ra4.allow * # Allow access to "stat8" volume
- option auth.addr.ra5.allow * # Allow access to "stat8" volume
- option auth.addr.ra6.allow * # Allow access to "stat8" volume
- option auth.addr.ra7.allow * # Allow access to "stat8" volume
- option auth.addr.ra8.allow * # Allow access to "stat8" volume
- option auth.addr.brick-ns.allow * # Allow access to "stat8" volume
-end-volume
-
diff --git a/doc/legacy/rpc-for-glusterfs.changes-done.txt b/doc/rpc-for-glusterfs.changes-done.txt
index 6bbbca788..6bbbca788 100644
--- a/doc/legacy/rpc-for-glusterfs.changes-done.txt
+++ b/doc/rpc-for-glusterfs.changes-done.txt
diff --git a/doc/split-brain.md b/doc/split-brain.md
new file mode 100644
index 000000000..b0d938e26
--- /dev/null
+++ b/doc/split-brain.md
@@ -0,0 +1,251 @@
+Steps to recover from File split-brain.
+======================================
+
+Quick Start:
+============
+1. Get the path of the file that is in split-brain:
+> It can be obtained either by
+> a) The command `gluster volume heal info split-brain`.
+> b) Identify the files for which file operations performed
+ from the client keep failing with Input/Output error.
+
+2. Close the applications that opened this file from the mount point.
+In case of VMs, they need to be powered-off.
+
+3. Decide on the correct copy:
+> This is done by observing the afr changelog extended attributes of the file on
+the bricks using the getfattr command; then identifying the type of split-brain
+(data split-brain, metadata split-brain, entry split-brain or split-brain due to
+gfid-mismatch); and finally determining which of the bricks contains the 'good copy'
+of the file.
+> `getfattr -d -m . -e hex <file-path-on-brick>`.
+It is also possible that one brick might contain the correct data while the
+other might contain the correct metadata.
+
+4. Reset the relevant extended attribute on the brick(s) that contains the
+'bad copy' of the file data/metadata using the setfattr command.
+> `setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>`
+
+5. Trigger self-heal on the file by performing lookup from the client:
+> `ls -l <file-path-on-gluster-mount>`
+
+Detailed Instructions for steps 3 through 5:
+===========================================
+To understand how to resolve split-brain we need to know how to interpret the
+afr changelog extended attributes.
+
+Execute `getfattr -d -m . -e hex <file-path-on-brick>`
+
+* Example:
+[root@store3 ~]# getfattr -d -e hex -m. brick-a/file.txt
+\#file: brick-a/file.txt
+security.selinux=0x726f6f743a6f626a6563745f723a66696c655f743a733000
+trusted.afr.vol-client-2=0x000000000000000000000000
+trusted.afr.vol-client-3=0x000000000200000000000000
+trusted.gfid=0x307a5c9efddd4e7c96e94fd4bcdcbd1b
+
+The extended attributes with `trusted.afr.<volname>-client-<subvolume-index>`
+are used by afr to maintain changelog of the file.The values of the
+`trusted.afr.<volname>-client-<subvolume-index>` are calculated by the glusterfs
+client (fuse or nfs-server) processes. When the glusterfs client modifies a file
+or directory, the client contacts each brick and updates the changelog extended
+attribute according to the response of the brick.
+
+'subvolume-index' is nothing but (brick number - 1) in
+`gluster volume info <volname>` output.
+
+* Example:
+[root@pranithk-laptop ~]# gluster volume info vol
+ Volume Name: vol
+ Type: Distributed-Replicate
+ Volume ID: 4f2d7849-fbd6-40a2-b346-d13420978a01
+ Status: Created
+ Number of Bricks: 4 x 2 = 8
+ Transport-type: tcp
+ Bricks:
+ brick-a: pranithk-laptop:/gfs/brick-a
+ brick-b: pranithk-laptop:/gfs/brick-b
+ brick-c: pranithk-laptop:/gfs/brick-c
+ brick-d: pranithk-laptop:/gfs/brick-d
+ brick-e: pranithk-laptop:/gfs/brick-e
+ brick-f: pranithk-laptop:/gfs/brick-f
+ brick-g: pranithk-laptop:/gfs/brick-g
+ brick-h: pranithk-laptop:/gfs/brick-h
+
+In the example above:
+```
+Brick | Replica set | Brick subvolume index
+----------------------------------------------------------------------------
+-/gfs/brick-a | 0 | 0
+-/gfs/brick-b | 0 | 1
+-/gfs/brick-c | 1 | 2
+-/gfs/brick-d | 1 | 3
+-/gfs/brick-e | 2 | 4
+-/gfs/brick-f | 2 | 5
+-/gfs/brick-g | 3 | 6
+-/gfs/brick-h | 3 | 7
+```
+
+Each file in a brick maintains the changelog of itself and that of the files
+present in all the other bricks in it's replica set as seen by that brick.
+
+In the example volume given above, all files in brick-a will have 2 entries,
+one for itself and the other for the file present in it's replica pair, i.e.brick-b:
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for itself (brick-a)
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for brick-b as seen by brick-a
+
+Likewise, all files in brick-b will have:
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for brick-a as seen by brick-b
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for itself (brick-b)
+
+The same can be extended for other replica pairs.
+
+Interpreting Changelog (roughly pending operation count) Value:
+Each extended attribute has a value which is 24 hexa decimal digits.
+First 8 digits represent changelog of data. Second 8 digits represent changelog
+of metadata. Last 8 digits represent Changelog of directory entries.
+
+Pictorially representing the same, we have:
+```
+0x 000003d7 00000001 00000000
+ | | |
+ | | \_ changelog of directory entries
+ | \_ changelog of metadata
+ \ _ changelog of data
+```
+
+
+For Directories metadata and entry changelogs are valid.
+For regular files data and metadata changelogs are valid.
+For special files like device files etc metadata changelog is valid.
+When a file split-brain happens it could be either data split-brain or
+meta-data split-brain or both. When a split-brain happens the changelog of the
+file would be something like this:
+
+* Example:(Lets consider both data, metadata split-brain on same file).
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000100000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000003b00000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+###Observations:
+
+####According to changelog extended attributes on file /gfs/brick-a/a:
+The first 8 digits of trusted.afr.vol-client-0 are all
+zeros (0x00000000................), and the first 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x000003d7................).
+So the changelog on /gfs/brick-a/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are
+all zeros (0x........00000000........), and the second 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x........00000001........).
+So the changelog on /gfs/brick-a/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+####According to Changelog extended attributes on file /gfs/brick-b/a:
+The first 8 digits of trusted.afr.vol-client-0 are not all
+zeros (0x000003b0................), and the first 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x00000000................).
+So the changelog on /gfs/brick-b/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are not
+all zeros (0x........00000001........), and the second 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x........00000000........).
+So the changelog on /gfs/brick-b/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+Since both the copies have data, metadata changes that are not on the other
+file, it is in both data and metadata split-brain.
+
+Deciding on the correct copy:
+-----------------------------
+The user may have to inspect stat,getfattr output of the files to decide which
+metadata to retain and contents of the file to decide which data to retain.
+Continuing with the example above, lets say we want to retain the data
+of /gfs/brick-a/a and metadata of /gfs/brick-b/a.
+
+Resetting the relevant changelogs to resolve the split-brain:
+-------------------------------------------------------------
+For resolving data-split-brain:
+We need to change the changelog extended attributes on the files as if some data
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a. But
+/gfs/brick-b/a should NOT have any changelog which says some data operations
+succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a. We need to reset the
+data part of the changelog on trusted.afr.vol-client-0 of /gfs/brick-b/a.
+
+For resolving metadata-split-brain:
+We need to change the changelog extended attributes on the files as if some
+metadata operations succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a.
+But /gfs/brick-a/a should NOT have any changelog which says some metadata
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a.
+We need to reset metadata part of the changelog on
+trusted.afr.vol-client-1 of /gfs/brick-a/a
+
+So, the intended changes are:
+On /gfs/brick-b/a:
+For trusted.afr.vol-client-0
+0x000003b00000000100000000 to 0x000000000000000100000000
+(Note that the metadata part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-0 -v 0x000000000000000100000000 /gfs/brick-b/a`
+
+On /gfs/brick-a/a:
+For trusted.afr.vol-client-1
+0x0000000000000000ffffffff to 0x000003d70000000000000000
+(Note that the data part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-1 -v 0x000003d70000000000000000 /gfs/brick-a/a`
+
+Thus after the above operations are done, the changelogs look like this:
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000000000000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+
+Triggering Self-heal:
+---------------------
+Perform `ls -l <file-path-on-gluster-mount>` to trigger healing.
+
+Fixing Directory entry split-brain:
+----------------------------------
+Afr has the ability to conservatively merge different entries in the directories
+when there is a split-brain on directory.
+If on one brick directory 'd' has entries '1', '2' and has entries '3', '4' on
+the other brick then afr will merge all of the entries in the directory to have
+'1', '2', '3', '4' entries in the same directory.
+(Note: this may result in deleted files to re-appear in case the split-brain
+happens because of deletion of files on the directory)
+Split-brain resolution needs human intervention when there is at least one entry
+which has same file name but different gfid in that directory.
+Example:
+On brick-a the directory has entries '1' (with gfid g1), '2' and on brick-b
+directory has entries '1' (with gfid g2) and '3'.
+These kinds of directory split-brains need human intervention to resolve.
+The user needs to remove either file '1' on brick-a or the file '1' on brick-b
+to resolve the split-brain. In addition, the corresponding gfid-link file also
+needs to be removed.The gfid-link files are present in the .glusterfs folder
+in the top-level directory of the brick. If the gfid of the file is
+0x307a5c9efddd4e7c96e94fd4bcdcbd1b (the trusted.gfid extended attribute got
+from the getfattr command earlier),the gfid-link file can be found at
+> /gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b
+
+####Word of caution:
+Before deleting the gfid-link, we have to ensure that there are no hard links
+to the file present on that brick. If hard-links exist,they must be deleted as
+well.
diff --git a/error-codes.json b/error-codes.json
new file mode 100644
index 000000000..5121049d3
--- /dev/null
+++ b/error-codes.json
@@ -0,0 +1,4 @@
+{
+ "ERR_DEV": {"code": 9999,
+ "message": {"en": "devel error"}}
+}
diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am
new file mode 100644
index 000000000..1dafa982b
--- /dev/null
+++ b/extras/LinuxRPM/Makefile.am
@@ -0,0 +1,57 @@
+
+GFS_TAR = ../../glusterfs-$(VERSION).tar.gz
+
+.PHONY: all
+
+all:
+ @echo "To build RPMS run 'make glusterrpms'"
+
+.PHONY: glusterrpms prep srcrpm testsrpm clean
+
+glusterrpms: prep srcrpm rpms
+ -rm -rf rpmbuild
+
+prep:
+ if [ ! -e $(GFS_TAR) ]; then \
+ $(MAKE) -C ../.. dist; \
+ fi
+ -mkdir -p rpmbuild/SPECS
+ -mkdir -p rpmbuild/RPMS
+ -mkdir -p rpmbuild/SRPMS
+ -rm -rf rpmbuild/SOURCES
+ @if [ -d /d/cache/glusterfs -a -e /d/cache/glusterfs/sources ]; then \
+ echo "copying glusterfs rpm files from local cache..." ; \
+ mkdir -p ./rpmbuild/SOURCES; \
+ cp /d/cache/glusterfs/* ./rpmbuild/SOURCES/ ; \
+ elif [ -x /usr/bin/git ]; then \
+ echo "fetching glusterfs rpm files from fedora git repo..."; \
+ cd ./rpmbuild && git clone git://pkgs.fedoraproject.org/glusterfs.git > /dev/null && mv glusterfs SOURCES; \
+ else \
+ echo "glusterfs rpm files not fetched, you don't have git installed!" ; \
+ exit 1 ; \
+ fi
+ cp ../../*.tar.gz ./rpmbuild/SOURCES
+ cp ../../glusterfs.spec ./rpmbuild/SPECS
+
+srcrpm:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* .
+
+rpms:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bb rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/RPMS/*/* .
+
+# EPEL-5 does not like new versions of rpmbuild and requires some
+# _source_* defines
+
+testsrpm: prep
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' \
+ --define '_source_payload w9.gzdio' \
+ --define '_source_filedigest_algorithm 1' \
+ -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* ../..
+ -rm -rf rpmbuild
+
+clean:
+ -rm -rf rpmbuild
+ -rm -f *.rpm
diff --git a/extras/Makefile.am b/extras/Makefile.am
index 0c260de92..cf619329b 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -1,11 +1,20 @@
-docdir = $(datadir)/doc/glusterfs/
-EditorModedir = $(docdir)/
+EditorModedir = $(docdir)
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
-SUBDIRS = init.d benchmarking hook-scripts
+SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM geo-rep
+
+confdir = $(sysconfdir)/glusterfs
+conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate
+
+voldir = $(sysconfdir)/glusterfs
+vol_DATA = glusterd.vol
EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim \
migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
backend-cleanup.sh disk_usage_sync.sh quota-remove-xattr.sh \
- quota-metadata-cleanup.sh glusterfs-logrotate
+ quota-metadata-cleanup.sh glusterfs-logrotate clear_xattrs.sh \
+ group-virt.example glusterd-sysconfig gluster-rsyslog-7.2.conf \
+ gluster-rsyslog-5.8.conf logger.conf.example glusterd.vol \
+ glusterfs-georep-logrotate
diff --git a/extras/benchmarking/Makefile.am b/extras/benchmarking/Makefile.am
index 04cc06182..bfcc59277 100644
--- a/extras/benchmarking/Makefile.am
+++ b/extras/benchmarking/Makefile.am
@@ -1,7 +1,5 @@
-docdir = $(datadir)/doc/$(PACKAGE_NAME)/benchmarking
-
-benchmarkingdir = $(docdir)
+benchmarkingdir = $(docdir)/benchmarking
benchmarking_DATA = rdd.c glfs-bm.c README launch-script.sh local-script.sh
diff --git a/extras/benchmarking/glfs-bm.c b/extras/benchmarking/glfs-bm.c
index 035d055df..dc717f33c 100644
--- a/extras/benchmarking/glfs-bm.c
+++ b/extras/benchmarking/glfs-bm.c
@@ -1,21 +1,12 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#define _GNU_SOURCE
#define __USE_FILE_OFFSET64
#define _FILE_OFFSET_BITS 64
diff --git a/extras/benchmarking/rdd.c b/extras/benchmarking/rdd.c
index b06333370..a667c6a1d 100644
--- a/extras/benchmarking/rdd.c
+++ b/extras/benchmarking/rdd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
diff --git a/extras/generate-xdr-files.sh b/extras/generate-xdr-files.sh
index e52321cd3..bc02f77c9 100755
--- a/extras/generate-xdr-files.sh
+++ b/extras/generate-xdr-files.sh
@@ -22,22 +22,13 @@ append_licence_header ()
cat >$dst_file <<EOF
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#include "xdr-common.h"
diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am
new file mode 100644
index 000000000..fc5f56d54
--- /dev/null
+++ b/extras/geo-rep/Makefile.am
@@ -0,0 +1,2 @@
+EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \
+ get-gfid.sh slave-upgrade.sh
diff --git a/extras/geo-rep/generate-gfid-file.sh b/extras/geo-rep/generate-gfid-file.sh
new file mode 100644
index 000000000..c6739fbf1
--- /dev/null
+++ b/extras/geo-rep/generate-gfid-file.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#Usage: generate-gfid-file.sh <master-volfile-server:master-volume> <path-to-get-gfid.sh> <output-file>
+
+function get_gfids()
+{
+ GET_GFID_CMD=$1
+ OUTPUT_FILE=$2
+ find . -exec $GET_GFID_CMD {} \; > $OUTPUT_FILE
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLFILE_SERVER=$1;
+ VOLUME=$2;
+ GFID_CMD=$3;
+ OUTPUT=$4;
+
+ T=$(mktemp -d);
+
+ glusterfs -s $VOLFILE_SERVER --volfile-id $VOLUME $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ get_gfids $GFID_CMD $OUTPUT
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+
+function main()
+{
+ SLAVE=$1
+ GET_GFID_CMD=$2
+ OUTPUT=$3
+
+ VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ mount_client $VOLFILE_SERVER $VOLUME_NAME $GET_GFID_CMD $OUTPUT
+}
+
+main "$@";
diff --git a/extras/geo-rep/get-gfid.sh b/extras/geo-rep/get-gfid.sh
new file mode 100755
index 000000000..a4d609b0b
--- /dev/null
+++ b/extras/geo-rep/get-gfid.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ATTR_STR=`getfattr -h $1 -n glusterfs.gfid.string`
+GLFS_PATH=`echo $ATTR_STR | sed -e 's/# file: \(.*\) glusterfs.gfid.string*/\1/g'`
+GFID=`echo $ATTR_STR | sed -e 's/.*glusterfs.gfid.string="\(.*\)"/\1/g'`
+
+echo "$GFID $GLFS_PATH"
diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c
new file mode 100644
index 000000000..601f4720e
--- /dev/null
+++ b/extras/geo-rep/gsync-sync-gfid.c
@@ -0,0 +1,106 @@
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <attr/xattr.h>
+#include <libgen.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#ifndef GF_FUSE_AUX_GFID_HEAL
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+#endif
+
+#define GLFS_LINE_MAX (PATH_MAX + (2 * UUID_CANONICAL_FORM_LEN))
+
+int
+main (int argc, char *argv[])
+{
+ char *file = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *parent_dir = NULL;
+ char *gfid = NULL;
+ char *bname = NULL;
+ int ret = -1;
+ int len = 0;
+ FILE *fp = NULL;
+ char line[GLFS_LINE_MAX] = {0,};
+ char *path = NULL;
+ void *blob = NULL;
+ void *tmp_blob = NULL;
+
+ if (argc != 2) {
+ /* each line in the file has the following format
+ * uuid-in-canonical-form path-relative-to-gluster-mount.
+ * Both uuid and relative path are from master mount.
+ */
+ fprintf (stderr, "usage: %s <file-of-paths-to-be-synced>\n",
+ argv[0]);
+ goto out;
+ }
+
+ file = argv[1];
+
+ fp = fopen (file, "r");
+ if (fp == NULL) {
+ fprintf (stderr, "cannot open %s for reading (%s)\n",
+ file, strerror (errno));
+ goto out;
+ }
+
+ while (fgets (line, GLFS_LINE_MAX, fp) != NULL) {
+ tmp = line;
+ path = gfid = line;
+
+ path += UUID_CANONICAL_FORM_LEN + 1;
+
+ while(isspace (*path))
+ path++;
+
+ if ((strlen (line) < GLFS_LINE_MAX) &&
+ (line[strlen (line) - 1] == '\n'))
+ line[strlen (line) - 1] = '\0';
+
+ line[UUID_CANONICAL_FORM_LEN] = '\0';
+
+ tmp = strdup (path);
+ tmp1 = strdup (path);
+ parent_dir = dirname (tmp);
+ bname = basename (tmp1);
+
+ /* gfid + '\0' + bname + '\0' */
+ len = UUID_CANONICAL_FORM_LEN + 1 + strlen (bname) + 1;
+
+ blob = calloc (1, len);
+
+ memcpy (blob, gfid, UUID_CANONICAL_FORM_LEN);
+
+ tmp_blob = blob + UUID_CANONICAL_FORM_LEN + 1;
+
+ memcpy (tmp_blob, bname, strlen (bname));
+
+ ret = setxattr (parent_dir, GF_FUSE_AUX_GFID_HEAL, blob, len,
+ 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr on %s/%s failed (%s)\n",
+ parent_dir, bname, strerror (errno));
+ }
+ memset (line, 0, GLFS_LINE_MAX);
+
+ free (blob);
+ free (tmp); free (tmp1);
+ blob = NULL;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
diff --git a/extras/geo-rep/gsync-upgrade.sh b/extras/geo-rep/gsync-upgrade.sh
new file mode 100644
index 000000000..b17948736
--- /dev/null
+++ b/extras/geo-rep/gsync-upgrade.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#usage: gsync-upgrade.sh <slave-volfile-server:slave-volume> <gfid-file>
+# <path-to-gsync-sync-gfid> <ssh-identity-file>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+#
+#<ssh-identity-file>: file from which the identity (private key) for public key authentication is read.
+
+SLAVE_MOUNT='/tmp/glfs_slave'
+
+function SSH()
+{
+ HOST=$1
+ SSHKEY=$2
+
+ shift 2
+
+ ssh -qi $SSHKEY \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ "$HOST" "$@";
+}
+
+function get_bricks()
+{
+ SSHKEY=$3
+
+ SSH $1 $SSHKEY "gluster volume info $2" | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+ SSHKEY=$3
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ SSH -i $SSHKEY $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \;"
+}
+
+function cleanup_slave()
+{
+ SSHKEY=$2
+
+ VOLFILE_SERVER=`echo $1 | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLFILE_SERVER $VOLUME_NAME $SSHKEY`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK $SSHKEY
+ done
+
+ SSH -i $SSHKEY $VOLFILE_SERVER "gluster --mode=script volume stop $VOLUME_NAME; gluster volume start $VOLUME_NAME";
+
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $2 $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount -l $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $3
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+ SSHKEY=$4
+
+ cleanup_slave $SLAVE $SSHKEY
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/geo-rep/slave-upgrade.sh b/extras/geo-rep/slave-upgrade.sh
new file mode 100644
index 000000000..6198f408a
--- /dev/null
+++ b/extras/geo-rep/slave-upgrade.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#usage: slave-upgrade.sh <volfile-server:volname> <gfid-file>
+# <path-to-gsync-sync-gfid>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+
+function get_bricks()
+{
+ gluster volume info $1 | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ ssh $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \; 2>/dev/null"
+}
+
+function cleanup_slave()
+{
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLUME_NAME`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK
+ done
+
+ # Now restart the volume
+ gluster --mode=script volume stop $VOLUME_NAME;
+ gluster volume start $VOLUME_NAME;
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLUME_NAME=$2;
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $VOLUME_NAME $T;
+
+ i=$(stat -c '%i' $T);
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $SYNC_CMD
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ cleanup_slave $SLAVE
+
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/gluster-rsyslog-5.8.conf b/extras/gluster-rsyslog-5.8.conf
new file mode 100644
index 000000000..2519999bc
--- /dev/null
+++ b/extras/gluster-rsyslog-5.8.conf
@@ -0,0 +1,51 @@
+##### gluster.conf #####
+
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named '$msgid'
+#
+$ModLoad mmcount
+$mmcountKey gf_code # start counting value of gf_code
+
+$template Glusterfsd_dynLogFile,"/var/log/glusterfs/bricks/%app-name%.log"
+$template Gluster_dynLogFile,"/var/log/glusterfs/%app-name%.log"
+
+$template GLFS_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+#
+## Pass logs to mmcount if app-name is 'gluster'
+#
+if $app-name contains 'gluster' then :mmcount:
+
+if $app-name contains 'glusterfsd' then ?Glusterfsd_dynLogFile;GLFS_Template
+if $app-name contains 'gluster' and not ( $app-name contains 'glusterfsd' ) then ?Gluster_dynLogFile;GLFS_Template
+
+#
+## Sample configuration to send a email alert for every 50th mmcount
+#
+#$ModLoad ommail
+#$ActionMailSMTPServer smtp.example.com
+#$ActionMailFrom rsyslog@example.com
+#$ActionMailTo glusteradmin@example.com
+#$template mailSubject,"50th message of gf_code=9999 on %hostname%"
+#$template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+#$ActionMailSubject mailSubject
+#$ActionExecOnlyOnceEveryInterval 30
+#if $app-name == 'glusterfsd' and $msgid != 0 and $msgid % 50 == 0 \
+#then :ommail:;RSYSLOG_SyslogProtocol23Format
+#
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+if $app-name contains 'gluster' then ~
diff --git a/extras/gluster-rsyslog-7.2.conf b/extras/gluster-rsyslog-7.2.conf
new file mode 100644
index 000000000..8b2841543
--- /dev/null
+++ b/extras/gluster-rsyslog-7.2.conf
@@ -0,0 +1,76 @@
+##### gluster.conf #####
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+$ModLoad mmjsonparse
+*.* :mmjsonparse:
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named 'mmcount'
+##
+## More info at http://www.rsyslog.com/doc/mmcount.html
+#
+#module(load="mmcount")
+#action(type="mmcount" appname="glusterd" key="!gf_code") # count each value of gf_code of appname glusterd
+#action(type="mmcount" appname="glusterfsd" key="!gf_code") # count each value of gf_code of appname glusterfsd
+#action(type="mmcount" appname="glusterfs" key="!gf_code") # count each value of gf_code of appname glusterfs
+
+template (name="Glusterfsd_dynLogFile" type="string" string="/var/log/glusterfs/bricks/%app-name%.log")
+template (name="Gluster_dynLogFile" type="string" string="/var/log/glusterfs/%app-name%.log")
+
+template(name="GLFS_template" type="list") {
+ property(name="$!mmcount")
+ constant(value="/")
+ property(name="syslogfacility-text" caseConversion="upper")
+ constant(value="/")
+ property(name="syslogseverity-text" caseConversion="upper")
+ constant(value=" ")
+ constant(value="[")
+ property(name="timereported" dateFormat="rfc3339")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_code")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_message")
+ constant(value="] ")
+ property(name="$!msg")
+ constant(value="\n")
+}
+
+if $app-name contains 'glusterfsd' then {
+ action(type="omfile"
+ DynaFile="Glusterfsd_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+if $app-name contains 'gluster' then {
+ action(type="omfile"
+ DynaFile="Gluster_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+#
+## send email for every 50th mmcount
+#$ModLoad ommail
+#if $app-name == 'glusterfsd' and $!mmcount <> 0 and $!mmcount % 50 == 0 then {
+# $ActionMailSMTPServer smtp.example.com
+# $ActionMailFrom rsyslog@example.com
+# $ActionMailTo glusteradmin@example.com
+# $template mailSubject,"50th message of gf_code=9999 on %hostname%"
+# $template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+# $ActionMailSubject mailSubject
+# $ActionExecOnlyOnceEveryInterval 30
+# :ommail:;RSYSLOG_SyslogProtocol23Format
+#}
+#
diff --git a/extras/glusterd-sysconfig b/extras/glusterd-sysconfig
new file mode 100644
index 000000000..8237c5711
--- /dev/null
+++ b/extras/glusterd-sysconfig
@@ -0,0 +1,6 @@
+## Set custom log file and log level (bellow are defaults)
+# LOG_FILE='/var/log/glusterfs/glusterd.log'
+# LOG_LEVEL='INFO'
+
+## Set custom options for glusterd
+# GLUSTERD_OPTIONS=''
diff --git a/doc/glusterd.vol b/extras/glusterd.vol
index 809042cbd..9bac52ab7 100644
--- a/doc/glusterd.vol
+++ b/extras/glusterd.vol
@@ -1,8 +1,9 @@
volume management
type mgmt/glusterd
- option working-directory /etc/glusterd
+ option working-directory /var/lib/glusterd
option transport-type socket,rdma
option transport.socket.keepalive-time 10
option transport.socket.keepalive-interval 2
option transport.socket.read-fail-log off
+# option base-port 49152
end-volume
diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate
new file mode 100644
index 000000000..6a69ab1e3
--- /dev/null
+++ b/extras/glusterfs-georep-logrotate
@@ -0,0 +1,18 @@
+
+rotate 52
+missingok
+
+compress
+delaycompress
+notifempty
+
+/var/log/glusterfs/geo-replication/*/*.log {
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*.log {
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*/*.log {
+}
diff --git a/extras/glusterfs.vim b/extras/glusterfs.vim
index 62102c1ee..899cc6551 100644
--- a/extras/glusterfs.vim
+++ b/extras/glusterfs.vim
@@ -1,20 +1,11 @@
" glusterfs.vim: GNU Vim Syntax file for GlusterFS .vol specification
-" Copyright (c) 2017-2011 Gluster, Inc. <http://www.gluster.com>
-" This file is part of GlusterFS.
+" Copyright (c) 2007777777Red Hat, Inc. <http://www.redhat.com>
+" This file is part of GlusterFS.
"
-" GlusterFS is free software; you can redistribute it and/or modify
-" it under the terms of the GNU General Public License as published
-" by the Free Software Foundation; either version 3 of the License,
-" or (at your option) any later version.
-"
-" GlusterFS is distributed in the hope that it will be useful, but
-" WITHOUT ANY WARRANTY; without even the implied warranty of
-" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-" General Public License for more details.
-"
-" You should have received a copy of the GNU General Public License
-" along with this program. If not, see
-" <http://www.gnu.org/licenses/>.
+" This file is licensed to you under your choice of the GNU Lesser
+" General Public License, version 3 or any later version (LGPLv3 or
+" later), or the GNU General Public License, version 2 (GPLv2), in all
+" cases as published by the Free Software Foundation.
"
" Last Modified: Wed Aug 1 00:47:10 IST 2007
" Version: 0.8
diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py
index 6b24e5a71..71e79b6be 100644
--- a/extras/gnfs-loganalyse.py
+++ b/extras/gnfs-loganalyse.py
@@ -1,21 +1,13 @@
#!/bin/python
"""
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
"""
import os
diff --git a/extras/group-virt.example b/extras/group-virt.example
new file mode 100644
index 000000000..7dc777f2d
--- /dev/null
+++ b/extras/group-virt.example
@@ -0,0 +1,6 @@
+quick-read=off
+read-ahead=off
+io-cache=off
+stat-prefetch=off
+eager-lock=enable
+remote-dio=enable
diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
index 5c6249de7..f6bded20c 100644
--- a/extras/hook-scripts/Makefile.am
+++ b/extras/hook-scripts/Makefile.am
@@ -1 +1 @@
-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S30samba-stop.sh
+EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S30samba-stop.sh S30samba-set.sh S56glusterd-geo-rep-create-post.sh
diff --git a/extras/hook-scripts/S30samba-set.sh b/extras/hook-scripts/S30samba-set.sh
new file mode 100755
index 000000000..6b11f5a4f
--- /dev/null
+++ b/extras/hook-scripts/S30samba-set.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/set/post/
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script enables user to enable or disable smb share by volume set
+#option. Keys "user.cifs" and "user.smb" both are valid, but user.smb is
+#preferred.
+
+
+PROGNAME="Ssamba-set"
+OPTSPEC="volname:"
+VOL=
+
+enable_smb=""
+
+function parse_args () {
+ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ *)
+ shift
+ for pair in $@; do
+ read key value < <(echo "$pair" | tr "=" " ")
+ case "$key" in
+ "user.cifs")
+ enable_smb=$value
+ ;;
+ "user.smb")
+ enable_smb=$value
+ ;;
+ *)
+ ;;
+ esac
+ done
+
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function add_samba_share () {
+ volname=$1
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = /var/log/samba/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
+ printf "$STRING" >> /etc/samba/smb.conf
+}
+
+function sighup_samba () {
+ pid=`cat /var/run/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP "$pid";
+ else
+ /etc/init.d/smb start
+ fi
+}
+
+function del_samba_share () {
+ volname=$1
+ sed -i "/\[gluster-$volname\]/,/^$/d" /etc/samba/smb.conf
+}
+
+function is_volume_started () {
+ volname=$1
+ echo "$(grep status /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)"
+}
+
+parse_args $@
+if [ "0" = $(is_volume_started "$VOL") ]; then
+ exit 0
+fi
+
+if [ "$enable_smb" = "enable" ]; then
+ if ! grep --quiet "\[gluster-$VOL\]" /etc/samba/smb.conf ; then
+ add_samba_share $VOL
+ sighup_samba
+ fi
+
+elif [ "$enable_smb" = "disable" ]; then
+ del_samba_share $VOL
+ sighup_samba
+fi
diff --git a/extras/hook-scripts/S30samba-start.sh b/extras/hook-scripts/S30samba-start.sh
index a42bb07eb..34fde0ef8 100755
--- a/extras/hook-scripts/S30samba-start.sh
+++ b/extras/hook-scripts/S30samba-start.sh
@@ -1,12 +1,31 @@
#!/bin/bash
+
#Need to be copied to hooks/<HOOKS_VER>/start/post
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically creates shares for volume on every volume start
+#event by adding the entries in smb.conf file and sending SIGHUP to samba.
+#
+#In smb.conf:
+#glusterfs vfs plugin has to be specified as required vfs object.
+#Path value is relative to the root of gluster volume;"/" signifies complete
+#volume.
+
PROGNAME="Ssamba-start"
-OPTSPEC="volname:,mnt:"
+OPTSPEC="volname:"
VOL=
-#FIXME: glusterd hook interface will eventually provide mntpt prefix as
-# command line arg
-MNT_PRE="/mnt/samba"
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
function parse_args () {
ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
@@ -18,10 +37,6 @@ function parse_args () {
shift
VOL=$1
;;
- --mnt)
- shift
- MNT_PRE=$1
- ;;
*)
shift
break
@@ -31,32 +46,65 @@ function parse_args () {
done
}
-function add_samba_export () {
- volname=$1
- mnt_pre=$2
- mkdir -p $mnt_pre/$volname && \
- printf "\n[gluster-$volname]\ncomment=For samba export of volume $volname\npath=$mnt_pre/$volname\nread only=no\nguest ok=yes\n" >> /etc/samba/smb.conf
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk {'print $2'}`
+ PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
+ LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'`
}
-function mount_volume () {
+function add_samba_share () {
volname=$1
- mnt_pre=$2
- #Mount shouldn't block on glusterd to fetch volfile, hence the 'bg'
- mount -t glusterfs `hostname`:$volname $mnt_pre/$volname &
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="guest ok = yes\n"
+ printf "$STRING" >> ${CONFIGFILE}
}
function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != "" ]
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
then
- kill -HUP $pid;
+ kill -HUP "$pid";
else
- /etc/init.d/smb start
+ /etc/init.d/smb condrestart
fi
}
+function get_smb () {
+ volname=$1
+ uservalue=
+
+ usercifsvalue=$(grep user.cifs /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb /var/lib/glusterd/vols/"$volname"/info |\
+ cut -d"=" -f2)
+
+ if [[ $usercifsvalue = "disable" || $usersmbvalue = "disable" ]]; then
+ uservalue="disable"
+ fi
+ echo "$uservalue"
+}
parse_args $@
-add_samba_export $VOL $MNT_PRE
-mount_volume $VOL $MNT_PRE
-sighup_samba
+if [ $(get_smb "$VOL") = "disable" ]; then
+ exit 0
+fi
+
+#Find smb.conf, smbd pid directory and smbd logfile path
+find_config_info
+
+if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then
+ add_samba_share $VOL
+ sighup_samba
+fi
diff --git a/extras/hook-scripts/S30samba-stop.sh b/extras/hook-scripts/S30samba-stop.sh
index 0e483bff8..8950eea43 100755
--- a/extras/hook-scripts/S30samba-stop.sh
+++ b/extras/hook-scripts/S30samba-stop.sh
@@ -1,11 +1,25 @@
#! /bin/bash
-#Need to be copied to hooks/<HOOKS_VER>/stop/post
+
+#Need to be copied to hooks/<HOOKS_VER>/stop/pre
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically removes shares for volume on every volume stop
+#event by removing the volume related entries(if any) in smb.conf file.
PROGNAME="Ssamba-stop"
-OPTSPEC="volname:,mnt:"
+OPTSPEC="volname:"
VOL=
-#FIXME: gluster will eventually pass mnt prefix as command line argument
-MNT_PRE="/mnt/samba"
+CONFIGFILE=
+PIDDIR=
function parse_args () {
ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
@@ -17,11 +31,6 @@ function parse_args () {
shift
VOL=$1
;;
- --mnt)
- shift
- MNT_PRE=$1
- echo $1
- ;;
*)
shift
break
@@ -31,30 +40,32 @@ function parse_args () {
done
}
-function del_samba_export () {
- volname=$1
- cp /etc/samba/smb.conf /tmp/smb.conf
- sed -i "/gluster-$volname/,/^$/d" /tmp/smb.conf &&\
- mv /tmp/smb.conf /etc/samba/smb.conf
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk {'print $2'}`
+ PIDDIR=`smbd -b | grep PIDDIR | awk {'print $2'}`
}
-function umount_volume () {
+function del_samba_share () {
volname=$1
- mnt_pre=$2
- umount -l $mnt_pre/$volname
+ sed -i "/\[gluster-$volname\]/,/^$/d" ${CONFIGFILE}
}
function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != "" ]
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
then
kill -HUP $pid;
else
- /etc/init.d/smb start
+ /etc/init.d/smb condrestart
fi
}
parse_args $@
-del_samba_export $VOL
-umount_volume $VOL $MNT_PRE
+find_config_info
+del_samba_share $VOL
sighup_samba
diff --git a/extras/hook-scripts/S40ufo-stop.py b/extras/hook-scripts/S40ufo-stop.py
new file mode 100755
index 000000000..107f19683
--- /dev/null
+++ b/extras/hook-scripts/S40ufo-stop.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import os
+from optparse import OptionParser
+
+if __name__ == '__main__':
+ # check if swift is installed
+ try:
+ from gluster.swift.common.Glusterfs import get_mnt_point, unmount
+ except ImportError:
+ import sys
+ sys.exit("Openstack Swift does not appear to be installed properly");
+
+ op = OptionParser(usage="%prog [options...]")
+ op.add_option('--volname', dest='vol', type=str)
+ op.add_option('--last', dest='last', type=str)
+ (opts, args) = op.parse_args()
+
+
+ mnt_point = get_mnt_point(opts.vol)
+ if mnt_point:
+ unmount(mnt_point)
+ else:
+ sys.exit("get_mnt_point returned none for mount point")
diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
new file mode 100755
index 000000000..1369c22fc
--- /dev/null
+++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+key_val_pair1=`echo $2 | cut -d ' ' -f 1`
+key_val_pair2=`echo $2 | cut -d ' ' -f 2`
+key_val_pair3=`echo $2 | cut -d ' ' -f 3`
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_push_pem" ]; then
+ exit;
+fi
+if [ "$val" != '1' ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "pub_file" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+pub_file=`echo $val`
+pub_file_tmp=`echo $val`_tmp
+
+key=`echo $key_val_pair3 | cut -d '=' -f 1`
+val=`echo $key_val_pair3 | cut -d '=' -f 2`
+if [ "$key" != "slave_ip" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_ip=`echo $val`
+
+if [ -f $pub_file ]; then
+ scp $pub_file $slave_ip:$pub_file_tmp
+ ssh $slave_ip "mv $pub_file_tmp $pub_file"
+ ssh $slave_ip "gluster system:: copy file /geo-replication/common_secret.pem.pub > /dev/null"
+ ssh $slave_ip "gluster system:: execute add_secret_pub > /dev/null"
+fi
diff --git a/extras/init.d/Makefile.am b/extras/init.d/Makefile.am
index 66715f431..38898fddd 100644
--- a/extras/init.d/Makefile.am
+++ b/extras/init.d/Makefile.am
@@ -1,19 +1,22 @@
-EXTRA_DIST = glusterd-Debian glusterd-Redhat glusterd-SuSE glusterd.plist
+EXTRA_DIST = glusterd-Debian glusterd-Redhat glusterd-SuSE glusterd.plist rhel5-load-fuse.modules
CLEANFILES =
-initdir = @initdir@
-launchddir = @launchddir@
+INIT_DIR = @initdir@
+SYSTEMD_DIR = @systemddir@
+LAUNCHD_DIR = @launchddir@
$(GF_DISTRIBUTION):
- $(mkdir_p) $(DESTDIR)$(initdir)
- $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(initdir)/glusterd
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glusterd; \
+ fi
install-exec-local: $(GF_DISTRIBUTION)
install-data-local:
if GF_DARWIN_HOST_OS
- $(mkdir_p) $(DESTDIR)$(launchddir)
- $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(launchddir)/com.gluster.glusterd.plist
+ $(mkdir_p) $(DESTDIR)$(LAUNCHD_DIR)
+ $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(LAUNCHD_DIR)/com.gluster.glusterd.plist
endif
diff --git a/extras/init.d/glusterd-Redhat.in b/extras/init.d/glusterd-Redhat.in
index 18f3debc4..e320708ae 100755
--- a/extras/init.d/glusterd-Redhat.in
+++ b/extras/init.d/glusterd-Redhat.in
@@ -1,76 +1,142 @@
#!/bin/bash
#
-# chkconfig: 35 20 80
-# description: Gluster File System service for volume management
+# glusterd Startup script for the glusterfs server
#
+# chkconfig: - 20 80
+# description: Clustered file-system server
-# Get function from functions library
+### BEGIN INIT INFO
+# Provides: glusterd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start:
+# Default-Stop: 0 1 2 3 4 5 6
+# Short-Description: glusterfs server
+# Description: Clustered file-system server
+### END INIT INFO
+#
+
+# Source function library.
. /etc/rc.d/init.d/functions
BASE=glusterd
-PIDFILE=/var/run/$BASE.pid
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
PID=`test -f $PIDFILE && cat $PIDFILE`
+
+# Overwriteable from sysconfig
+LOG_LEVEL=''
+LOG_FILE=''
+GLUSTERD_OPTIONS=''
+GLUSTERD_NOFILE='65536'
+
+[ -f /etc/sysconfig/${BASE} ] && . /etc/sysconfig/${BASE}
+
+[ ! -z $LOG_LEVEL ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-level ${LOG_LEVEL}"
+[ ! -z $LOG_FILE ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-file ${LOG_FILE}"
+
GLUSTERFSD=glusterfsd
GLUSTERFS=glusterfs
GLUSTERD_BIN=@prefix@/sbin/$BASE
-GLUSTERD_OPTS="--pid-file=$PIDFILE"
+GLUSTERD_OPTS="--pid-file=$PIDFILE ${GLUSTERD_OPTIONS}"
GLUSTERD="$GLUSTERD_BIN $GLUSTERD_OPTS"
RETVAL=0
+LOCKFILE=/var/lock/subsys/${BASE}
+
# Start the service $BASE
start()
{
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
echo "glusterd service is already running with pid $PID"
- exit 1
+ return 0
else
+ ulimit -n $GLUSTERD_NOFILE
echo -n $"Starting $BASE:"
daemon $GLUSTERD
RETVAL=$?
echo
- [ $RETVAL -ne 0 ] && exit $RETVAL
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
fi
-
}
# Stop the service $BASE
stop()
{
echo -n $"Stopping $BASE:"
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
killproc -p $PIDFILE $BASE
- [ -w $PIDFILE ] && rm -f $PIDFILE
else
killproc $BASE
fi
+ RETVAL=$?
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
- echo
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+rh_status_q()
+{
+ rh_status &>/dev/null
}
### service arguments ###
case $1 in
start)
- start
+ rh_status_q && exit 0
+ $1
;;
stop)
- stop
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
;;
status)
- status $BASE
+ rh_status
;;
- restart)
- $0 stop
- $0 start
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
exit 1
esac
-exit 0
+exit $?
diff --git a/extras/init.d/glusterd-SuSE.in b/extras/init.d/glusterd-SuSE.in
index 16cf8de6a..6259bab00 100755
--- a/extras/init.d/glusterd-SuSE.in
+++ b/extras/init.d/glusterd-SuSE.in
@@ -2,8 +2,8 @@
#
### BEGIN INIT INFO
# Provides: glusterd
-# Required-Start: $local_fs $network
-# Required-Stop:
+# Required-Start: $remote_fs $network
+# Required-Stop: $remote_fs $network
# Default-Start: 3 5
# Default-Stop:
# Short-Description: Gluster File System service for volume management
@@ -61,13 +61,17 @@ case $1 in
fi
rc_status -v
;;
+ reload)
+ rc_failed 3
+ rc_status -v
+ ;;
restart)
$0 stop
$0 start
rc_status
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|reload|restart}."
exit 1
esac
diff --git a/extras/init.d/rhel5-load-fuse.modules b/extras/init.d/rhel5-load-fuse.modules
new file mode 100755
index 000000000..ee194db99
--- /dev/null
+++ b/extras/init.d/rhel5-load-fuse.modules
@@ -0,0 +1,7 @@
+#!/bin/sh
+#
+# fusermount-glusterfs requires the /dev/fuse character device. The fuse module
+# provides this and is loaded on demand in newer Linux distributions.
+#
+
+[ -c /dev/fuse ] || /sbin/modprobe fuse
diff --git a/extras/logger.conf.example b/extras/logger.conf.example
new file mode 100644
index 000000000..248be5bda
--- /dev/null
+++ b/extras/logger.conf.example
@@ -0,0 +1,13 @@
+#
+# Sample logger.conf file to configure enhanced Logging in GlusterFS
+#
+# To enable enhanced logging capabilities,
+#
+# 1. rename this file to /etc/glusterfs/logger.conf
+#
+# 2. rename /etc/rsyslog.d/gluster.conf.example to
+# /etc/rsyslog.d/gluster.conf
+#
+# This change requires restart of all gluster services/volumes and
+# rsyslog.
+#
diff --git a/extras/ocf/Makefile.am b/extras/ocf/Makefile.am
new file mode 100644
index 000000000..c49a835fb
--- /dev/null
+++ b/extras/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST = glusterd.in volume.in
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The ceph provider directory
+radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
+
+ra_SCRIPTS = glusterd volume
diff --git a/extras/ocf/glusterd.in b/extras/ocf/glusterd.in
new file mode 100755
index 000000000..c119a285d
--- /dev/null
+++ b/extras/ocf/glusterd.in
@@ -0,0 +1,212 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+#######################################################################
+
+
+OCF_RESKEY_binary_default="glusterd"
+OCF_RESKEY_pid_default="@localstatedir@/run/glusterd.pid"
+OCF_RESKEY_socket_default=""
+OCF_RESKEY_additional_parameters_default=""
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+glusterd_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="glusterd" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+ </longdesc>
+ <shortdesc lang="en">Manages a Gluster server</shortdesc>
+ <parameters>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the glusterd executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">glusterd executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="pid">
+ <longdesc lang="en">
+ Path to the glusterd PID file.
+ </longdesc>
+ <shortdesc lang="en">PID file</shortdesc>
+ <content type="string" default="$OCF_RESKEY_pid_default"/>
+ </parameter>
+ <parameter name="socket">
+ <longdesc lang="en">
+ Path to the glusterd UNIX socket file. If unspecified,
+ glusterd will not listen on any socket.
+ </longdesc>
+ <shortdesc lang="en">Socket file</shortdesc>
+ <content type="string"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+glusterd_start() {
+ local glusterd_options
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if glusterd_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ glusterd_options="-p $OCF_RESKEY_pid"
+ if [ -n "$OCF_RESKEY_socket" ]; then
+ glusterd_options="$glusterd_options -S $OCF_RESKEY_socket"
+ fi
+ if [ -n "$OCF_RESKEY_additional_parameters" ]; then
+ glusterd_options="$glusterd_options $OCF_RESKEY_additional_parameters"
+ fi
+
+ ocf_run $OCF_RESKEY_binary $glusterd_options || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! glusterd_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+glusterd_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ glusterd_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s TERM $pid || exit OCF_ERR_GENERIC
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while glusterd_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+glusterd_monitor() {
+ local pid
+
+ [ -e $OCF_RESKEY_pid ] || return $OCF_NOT_RUNNING
+
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+
+ ocf_log debug "$OCF_RESKEY_binary running with PID $pid"
+ return $OCF_SUCCESS
+}
+
+glusterd_validate_all() {
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) glusterd_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) glusterd_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+glusterd_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) glusterd_start;;
+stop) glusterd_stop;;
+status|monitor) glusterd_monitor;;
+reload) ocf_log info "Reloading..."
+ glusterd_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) glusterd_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in
new file mode 100755
index 000000000..72fd1213a
--- /dev/null
+++ b/extras/ocf/volume.in
@@ -0,0 +1,246 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+SHORTHOSTNAME=`hostname -s`
+#######################################################################
+
+OCF_RESKEY_binary_default="gluster"
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+
+volume_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="volume" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+Manages a GlusterFS volume and monitors its bricks. When a resource of
+this type is configured as a clone (as is commonly the case), then it
+must have clone ordering enabled.
+ </longdesc>
+ <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
+ <parameters>
+ <parameter name="volname" required="1">
+ <longdesc lang="en">
+ The name of the volume to manage.
+ </longdesc>
+ <shortdesc lang="en">volume name</shortdesc>
+ <content type="string"/>
+ </parameter>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the gluster executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">gluster executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+volume_getdir() {
+ local voldir
+ voldir="@sysconfdir@/glusterd/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${voldir} ] || return 1
+
+ echo "${voldir}"
+ return 0
+}
+
+volume_getbricks() {
+ local infofile
+ local voldir
+ voldir=`volume_getdir`
+ infofile="${voldir}/info"
+
+ [ -e ${infofile} ] || return 1
+
+ echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
+ return 0
+}
+
+volume_getpids() {
+ local bricks
+ local piddir
+ local pidfile
+ local infofile
+ local voldir
+
+ voldir=`volume_getdir`
+ bricks=`volume_getbricks`
+ piddir="${voldir}/run"
+
+ for brick in ${bricks}; do
+ pidfile="${piddir}/${SHORTHOSTNAME}${brick}.pid"
+ [ -e $pidfile ] || return 1
+ cat $pidfile
+ done
+
+ return 0
+}
+
+volume_start() {
+ local volume_options
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if volume_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here
+ ocf_run "$OCF_RESKEY_binary" \
+ volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! volume_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+volume_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ volume_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pids=`volume_getpids`
+ for pid in $pids; do
+ ocf_run kill -s TERM $pid
+ done
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while volume_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+volume_monitor() {
+ local pid
+
+ pids=`volume_getpids` || return $OCF_NOT_RUNNING
+
+ for pid in $pids; do
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+ done
+
+ ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
+ return $OCF_SUCCESS
+}
+
+volume_validate_all() {
+ # Test for configuration errors first
+ if [ -z "${OCF_RESKEY_volname}" ]; then
+ ocf_log err 'Missing required parameter "volname"'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) volume_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) volume_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+volume_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) volume_start;;
+stop) volume_stop;;
+status|monitor) volume_monitor;;
+reload) ocf_log info "Reloading..."
+ volume_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) volume_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler
index 042eadcf2..65d445864 100755
--- a/extras/profiler/glusterfs-profiler
+++ b/extras/profiler/glusterfs-profiler
@@ -1,22 +1,15 @@
#!/usr/bin/env python
-# texttable - module for creating simple ASCII tables
-# Copyright (C) 2003-2010 Gerome Fournier <jefke(at)free.fr>
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# texttable - module for creating simple ASCII tables
# Incorporated from texttable.py downloaded from
# http://jefke.free.fr/stuff/python/texttable/texttable-0.7.0.tar.gz
diff --git a/extras/prot_filter.py b/extras/prot_filter.py
new file mode 100755
index 000000000..7dccacf15
--- /dev/null
+++ b/extras/prot_filter.py
@@ -0,0 +1,144 @@
+#!/usr/bin/python
+
+"""
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
+
+"""
+ INSTRUCTIONS
+ Put this in /usr/lib64/glusterfs/$version/filter to have it run automatically,
+ or else you'll have to run it by hand every time you change the volume
+ configuration. Give it a list of volume names on which to enable the
+ protection functionality; it will deliberately ignore client volfiles for
+ other volumes, and all server volfiles. It *will* include internal client
+ volfiles such as those used for NFS or rebalance/self-heal; this is a
+ deliberate choice so that it will catch deletions from those sources as well.
+"""
+
+volume_list = [ "jdtest" ]
+
+import copy
+import string
+import sys
+import types
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.xl_type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in types.StringTypes:
+ fp = file(path,"r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError, "nested volume definition"
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError, "text outside volume definition"
+ if text[0] == "type":
+ xlator.xl_type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = string.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError, "unrecognized keyword %s" % text[0]
+ if xlator:
+ raise RuntimeError, "unclosed volume definition"
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph,sv,stream)
+ print >> stream, ""
+ sv.dumped = True
+ print >> stream, "volume %s" % last.name
+ print >> stream, " type %s" % last.xl_type
+ for k, v in last.opts.iteritems():
+ print >> stream, " option %s %s" % (k, v)
+ if last.subvols:
+ print >> stream, " subvolumes %s" % string.join(
+ [ sv.name for sv in last.subvols ])
+ print >> stream, "end-volume"
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ new_type = "-" + filt_type.split("/")[1]
+ old_type = "-" + old_xl.xl_type.split("/")[1]
+ pos = old_xl.name.find(old_type)
+ if pos >= 0:
+ new_name = old_xl.name
+ old_name = new_name[:pos] + new_type + new_name[len(old_type)+pos:]
+ else:
+ new_name = old_xl.name + old_type
+ old_name = old_xl.name + new_type
+ new_xl = Translator(new_name)
+ new_xl.xl_type = old_xl.xl_type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name = old_name
+ old_xl.xl_type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+if __name__ == "__main__":
+ path = sys.argv[1]
+ # Alow an override for debugging.
+ for extra in sys.argv[2:]:
+ volume_list.append(extra)
+ graph, last = load(path)
+ for v in volume_list:
+ if graph.has_key(v):
+ break
+ else:
+ print "No configured volumes found - aborting."
+ sys.exit(0)
+ for v in graph.values():
+ if v.xl_type == "cluster/distribute":
+ push_filter(graph,v,"features/prot_dht")
+ elif v.xl_type == "protocol/client":
+ push_filter(graph,v,"features/prot_client")
+ # We push debug/trace so that every fop gets a real frame, because DHT
+ # gets confused if STACK_WIND_TAIL causes certain fops to be invoked
+ # from anything other than a direct child.
+ for v in graph.values():
+ if v.xl_type == "features/prot_client":
+ push_filter(graph,v,"debug/trace")
+ generate(graph,last,stream=open(path,"w"))
diff --git a/extras/rebalance.py b/extras/rebalance.py
new file mode 100755
index 000000000..80c614c5d
--- /dev/null
+++ b/extras/rebalance.py
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+import atexit
+import copy
+import optparse
+import os
+import pipes
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import volfilter
+
+# It's just more convenient to have named fields.
+class Brick:
+ def __init__ (self, path, name):
+ self.path = path
+ self.sv_name = name
+ self.size = 0
+ self.curr_size = 0
+ self.good_size = 0
+ def set_size (self, size):
+ self.size = size
+ def set_range (self, rs, re):
+ self.r_start = rs
+ self.r_end = re
+ self.curr_size = self.r_end - self.r_start + 1
+ def __repr__ (self):
+ value = self.path[:]
+ value += "(%d," % self.size
+ if self.curr_size:
+ value += "0x%x,0x%x)" % (self.r_start, self.r_end)
+ else:
+ value += "-)"
+ return value
+
+def get_bricks (host, vol):
+ t = pipes.Template()
+ t.prepend("gluster --remote-host=%s system getspec %s"%(host,vol),".-")
+ return t.open(None,"r")
+
+def generate_stanza (vf, all_xlators, cur_subvol):
+ sv_list = []
+ for sv in cur_subvol.subvols:
+ generate_stanza(vf,all_xlators,sv)
+ sv_list.append(sv.name)
+ vf.write("volume %s\n"%cur_subvol.name)
+ vf.write(" type %s\n"%cur_subvol.type)
+ for kvpair in cur_subvol.opts.iteritems():
+ vf.write(" option %s %s\n"%kvpair)
+ if sv_list:
+ vf.write(" subvolumes %s\n"%string.join(sv_list))
+ vf.write("end-volume\n\n")
+
+
+def mount_brick (localpath, all_xlators, dht_subvol):
+
+ # Generate a volfile.
+ vf_name = localpath + ".vol"
+ vf = open(vf_name,"w")
+ generate_stanza(vf,all_xlators,dht_subvol)
+ vf.flush()
+ vf.close()
+
+ # Create a brick directory and mount the brick there.
+ os.mkdir(localpath)
+ subprocess.call(["glusterfs","-f",vf_name,localpath])
+
+# We use the command-line tools because there's no getxattr support in the
+# Python standard library (which is ridiculous IMO). Adding the xattr package
+# from PyPI would create a new and difficult dependency because the bits to
+# satisfy it don't seem to exist in Fedora. We already expect the command-line
+# tools to be there, so it's safer just to rely on them.
+#
+# We might have to revisit this if we get as far as actually issuing millions
+# of setxattr requests. Even then, it might be better to do that part with a C
+# program which has only a build-time dependency.
+def get_range (brick):
+ t = pipes.Template()
+ cmd = "getfattr -e hex -n trusted.glusterfs.dht %s 2> /dev/null"
+ t.prepend(cmd%brick,".-")
+ t.append("grep ^trusted.glusterfs.dht=","--")
+ f = t.open(None,"r")
+ try:
+ value = f.readline().rstrip().split('=')[1][2:]
+ except:
+ print "could not get layout for %s (might be OK)" % brick
+ return None
+ v_start = int("0x"+value[16:24],16)
+ v_end = int("0x"+value[24:32],16)
+ return (v_start, v_end)
+
+def calc_sizes (bricks, total):
+ leftover = 1 << 32
+ for b in bricks:
+ if b.size:
+ b.good_size = (b.size << 32) / total
+ leftover -= b.good_size
+ else:
+ b.good_size = 0
+ if leftover:
+ # Add the leftover to an old brick if we can.
+ for b in bricks:
+ if b.good_size:
+ b.good_size += leftover
+ break
+ else:
+ # Fine, just add it wherever.
+ bricks[0].good_size += leftover
+
+# Normalization means sorting the bricks by r_start and (b) ensuring that there
+# are no gaps.
+def normalize (in_bricks):
+ out_bricks = []
+ curr_hash = 0
+ used = 0
+ while curr_hash < (1<<32):
+ curr_best = None
+ for b in in_bricks:
+ if b.r_start == curr_hash:
+ used += 1
+ out_bricks.append(b)
+ in_bricks.remove(b)
+ curr_hash = b.r_end + 1
+ break
+ else:
+ print "gap found at 0x%08x" % curr_hash
+ sys.exit(1)
+ return out_bricks + in_bricks, used
+
+def get_score (bricks):
+ score = 0
+ curr_hash = 0
+ for b in bricks:
+ if not b.curr_size:
+ curr_hash += b.good_size
+ continue
+ new_start = curr_hash
+ curr_hash += b.good_size
+ new_end = curr_hash - 1
+ if new_start > b.r_start:
+ max_start = new_start
+ else:
+ max_start = b.r_start
+ if new_end < b.r_end:
+ min_end = new_end
+ else:
+ min_end = b.r_end
+ if max_start <= min_end:
+ score += (min_end - max_start + 1)
+ return score
+
+if __name__ == "__main__":
+
+ my_usage = "%prog [options] server volume [directory]"
+ parser = optparse.OptionParser(usage=my_usage)
+ parser.add_option("-f", "--free-space", dest="free_space",
+ default=False, action="store_true",
+ help="use free space instead of total space")
+ parser.add_option("-l", "--leave-mounted", dest="leave_mounted",
+ default=False, action="store_true",
+ help="leave subvolumes mounted")
+ parser.add_option("-v", "--verbose", dest="verbose",
+ default=False, action="store_true",
+ help="verbose output")
+ options, args = parser.parse_args()
+
+ if len(args) == 3:
+ fix_dir = args[2]
+ else:
+ if len(args) != 2:
+ parser.print_help()
+ sys.exit(1)
+ fix_dir = None
+ hostname, volname = args[:2]
+
+ # Make sure stuff gets cleaned up, even if there are exceptions.
+ orig_dir = os.getcwd()
+ work_dir = tempfile.mkdtemp()
+ bricks = []
+ def cleanup_workdir ():
+ os.chdir(orig_dir)
+ if options.verbose:
+ print "Cleaning up %s" % work_dir
+ for b in bricks:
+ subprocess.call(["umount",b.path])
+ shutil.rmtree(work_dir)
+ if not options.leave_mounted:
+ atexit.register(cleanup_workdir)
+ os.chdir(work_dir)
+
+ # Mount each brick individually, so we can issue brick-specific calls.
+ if options.verbose:
+ print "Mounting subvolumes..."
+ index = 0
+ volfile_pipe = get_bricks(hostname,volname)
+ all_xlators, last_xlator = volfilter.load(volfile_pipe)
+ for dht_vol in all_xlators.itervalues():
+ if dht_vol.type == "cluster/distribute":
+ break
+ else:
+ print "no DHT volume found"
+ sys.exit(1)
+ for sv in dht_vol.subvols:
+ #print "found subvol %s" % sv.name
+ lpath = "%s/brick%s" % (work_dir, index)
+ index += 1
+ mount_brick(lpath,all_xlators,sv)
+ bricks.append(Brick(lpath,sv.name))
+ if index == 0:
+ print "no bricks"
+ sys.exit(1)
+
+ # Collect all of the sizes.
+ if options.verbose:
+ print "Collecting information..."
+ total = 0
+ for b in bricks:
+ info = os.statvfs(b.path)
+ # We want a standard unit even if different bricks use
+ # different block sizes. The size is chosen to avoid overflows
+ # for very large bricks with very small block sizes, but also
+ # accommodate filesystems which use very large block sizes to
+ # cheat on benchmarks.
+ blocksper100mb = 104857600 / info[0]
+ if options.free_space:
+ size = info[3] / blocksper100mb
+ else:
+ size = info[2] / blocksper100mb
+ if size <= 0:
+ print "brick %s has invalid size %d" % (b.path, size)
+ sys.exit(1)
+ b.set_size(size)
+ total += size
+
+ # Collect all of the layout information.
+ for b in bricks:
+ hash_range = get_range(b.path)
+ if hash_range is not None:
+ rs, re = hash_range
+ if rs > re:
+ print "%s has backwards hash range" % b.path
+ sys.exit(1)
+ b.set_range(hash_range[0],hash_range[1])
+
+ if options.verbose:
+ print "Calculating new layouts..."
+ calc_sizes(bricks,total)
+ bricks, used = normalize(bricks)
+
+ # We can't afford O(n!) here, but O(n^2) should be OK and the result
+ # should be almost as good.
+ while used < len(bricks):
+ best_place = used
+ best_score = get_score(bricks)
+ for i in xrange(used):
+ new_bricks = bricks[:]
+ del new_bricks[used]
+ new_bricks.insert(i,bricks[used])
+ new_score = get_score(new_bricks)
+ if new_score > best_score:
+ best_place = i
+ best_score = new_score
+ if best_place != used:
+ nb = bricks[used]
+ del bricks[used]
+ bricks.insert(best_place,nb)
+ used += 1
+
+ # Finalize whatever we decided on.
+ curr_hash = 0
+ for b in bricks:
+ b.r_start = curr_hash
+ curr_hash += b.good_size
+ b.r_end = curr_hash - 1
+
+ print "Here are the xattr values for your size-weighted layout:"
+ for b in bricks:
+ print " %s: 0x0000000200000000%08x%08x" % (
+ b.sv_name, b.r_start, b.r_end)
+
+ if fix_dir:
+ if options.verbose:
+ print "Fixing layout for %s" % fix_dir
+ for b in bricks:
+ value = "0x0000000200000000%08x%08x" % (
+ b.r_start, b.r_end)
+ path = "%s/%s" % (b.path, fix_dir)
+ cmd = "setfattr -n trusted.glusterfs.dht -v %s %s" % (
+ value, path)
+ print cmd
+
+ if options.leave_mounted:
+ print "The following subvolumes are still mounted:"
+ for b in bricks:
+ print "%s on %s" % (b.sv_name, b.path)
+ print "Don't forget to clean up when you're done."
+
diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c
index 3f8e4b124..74bd47e30 100644
--- a/extras/stripe-merge.c
+++ b/extras/stripe-merge.c
@@ -1,48 +1,494 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ * stripe-merge.c
+ *
+ * This program recovers an original file based on the striped files stored on
+ * the individual bricks of a striped volume. The file format and stripe
+ * geometry is validated through the extended attributes stored in the file.
+ *
+ * TODO: Support optional xattr recovery (i.e., user xattrs). Perhaps provide a
+ * command-line flag to toggle this behavior.
+ */
+
#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include <string.h>
+#include <attr/xattr.h>
+#include <fnmatch.h>
-int
-main (int argc, char *argv[])
+#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index"
+#define ATTRNAME_STRIPE_COUNT "trusted.*.stripe-count"
+#define ATTRNAME_STRIPE_SIZE "trusted.*.stripe-size"
+#define ATTRNAME_STRIPE_COALESCE "trusted.*.stripe-coalesce"
+
+#define INVALID_FD -1
+#define INVALID_MODE UINT32_MAX
+
+struct file_stripe_info {
+ int stripe_count;
+ int stripe_size;
+ int coalesce;
+ mode_t mode;
+ int fd[0];
+};
+
+static int close_files(struct file_stripe_info *);
+
+static struct
+file_stripe_info *alloc_file_stripe_info(int count)
{
- int fds[argc-1];
- char buf[argc-1][4096];
int i;
- int max_ret, ret;
+ struct file_stripe_info *finfo;
- if (argc < 2) {
- printf ("Usage: %s file1 file2 ... >file\n", argv[0]);
- return 1;
+ finfo = calloc(1, sizeof(struct file_stripe_info) +
+ (sizeof(int) * count));
+ if (!finfo)
+ return NULL;
+
+ for (i = 0; i < count; i++)
+ finfo->fd[i] = INVALID_FD;
+
+ finfo->mode = INVALID_MODE;
+ finfo->coalesce = INVALID_FD;
+
+ return finfo;
+}
+
+/*
+ * Search for an attribute matching the provided pattern. Return a count for
+ * the total number of matching entries (including 0). Allocate a buffer for
+ * the first matching entry found.
+ */
+static int
+get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
+{
+ char attrbuf[4096];
+ char *ptr, *match = NULL;
+ int len, r, match_count = 0;
+
+ if (!path || !pattern || !attrname)
+ return -1;
+
+ len = listxattr(path, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ ptr = attrbuf;
+ while (ptr) {
+ r = fnmatch(pattern, ptr, 0);
+ if (!r) {
+ if (!match)
+ match = ptr;
+ match_count++;
+ } else if (r != FNM_NOMATCH) {
+ return -1;
+ }
+
+ len -= strlen(ptr) + 1;
+ if (len > 0)
+ ptr += strlen(ptr) + 1;
+ else
+ ptr = NULL;
}
- for (i=0; i<argc-1; i++) {
- fds[i] = open (argv[i+1], O_RDONLY);
- if (fds[i] == -1) {
- perror (argv[i+1]);
- return 1;
+ if (match)
+ *attrname = strdup(match);
+
+ return match_count;
+}
+
+/*
+ * Get the integer representation of a named attribute.
+ */
+static int
+get_stripe_attr_val(const char *path, const char *attr, int *val)
+{
+ char attrbuf[4096];
+ int len;
+
+ if (!path || !attr || !val)
+ return -1;
+
+ len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ *val = atoi(attrbuf);
+
+ return 0;
+}
+
+/*
+ * Get an attribute name/value (assumed to be an integer) pair based on a
+ * specified search pattern. A buffer is allocated for the exact attr name
+ * returned. Optionally, skip the pattern search if a buffer is provided
+ * (which should contain an attribute name).
+ *
+ * Returns the attribute count or -1 on error. The value parameter is set only
+ * when a single attribute is found.
+ */
+static int
+get_attr(const char *path, const char *pattern, char **buf, int *val)
+{
+ int count = 1;
+
+ if (!buf)
+ return -1;
+
+ if (!*buf) {
+ count = get_stripe_attr_name(path, pattern, buf);
+ if (count > 1) {
+ /* pattern isn't good enough */
+ fprintf(stderr, "ERROR: duplicate attributes found "
+ "matching pattern: %s\n", pattern);
+ free(*buf);
+ *buf = NULL;
+ return count;
+ } else if (count < 1) {
+ return count;
}
}
- max_ret = 0;
+ if (get_stripe_attr_val(path, *buf, val) < 0)
+ return -1;
+
+ return count;
+}
+
+/*
+ * validate_and_open_files()
+ *
+ * Open the provided source files and validate the extended attributes. Verify
+ * that the geometric attributes are consistent across all of the files and
+ * print a warning if any files are missing. We proceed without error in the
+ * latter case to support partial recovery.
+ */
+static struct
+file_stripe_info *validate_and_open_files(char *paths[], int count)
+{
+ int i, val, tmp;
+ struct stat sbuf;
+ char *stripe_count_attr = NULL;
+ char *stripe_size_attr = NULL;
+ char *stripe_index_attr = NULL;
+ char *stripe_coalesce_attr = NULL;
+ struct file_stripe_info *finfo = NULL;
+
+ for (i = 0; i < count; i++) {
+ if (!paths[i])
+ goto err;
+
+ /*
+ * Check the stripe count first so we can allocate the info
+ * struct with the appropriate number of fds.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT,
+ &stripe_count_attr, &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_COUNT);
+ goto err;
+ }
+ if (!finfo) {
+ finfo = alloc_file_stripe_info(val);
+ if (!finfo)
+ goto err;
+
+ if (val != count)
+ fprintf(stderr, "WARNING: %s: stripe-count "
+ "(%d) != file count (%d). Result may "
+ "be incomplete.\n", paths[i], val,
+ count);
+
+ finfo->stripe_count = val;
+ } else if (val != finfo->stripe_count) {
+ fprintf(stderr, "ERROR %s: invalid stripe count: %d "
+ "(expected %d)\n", paths[i], val,
+ finfo->stripe_count);
+ goto err;
+ }
+
+ /*
+ * Get and validate the chunk size.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_SIZE);
+ goto err;
+ }
+
+ if (!finfo->stripe_size) {
+ finfo->stripe_size = val;
+ } else if (val != finfo->stripe_size) {
+ fprintf(stderr, "ERROR: %s: invalid stripe size: %d "
+ "(expected %d)\n", paths[i], val,
+ finfo->stripe_size);
+ goto err;
+ }
+
+ /*
+ * stripe-coalesce is a backward compatible attribute. If the
+ * attribute does not exist, assume a value of zero for the
+ * traditional stripe format.
+ */
+ tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
+ &stripe_coalesce_attr, &val);
+ if (!tmp) {
+ val = 0;
+ } else if (tmp != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_COALESCE);
+ goto err;
+ }
+
+ if (finfo->coalesce == INVALID_FD) {
+ finfo->coalesce = val;
+ } else if (val != finfo->coalesce) {
+ fprintf(stderr, "ERROR: %s: invalid coalesce flag\n",
+ paths[i]);
+ goto err;
+ }
+
+ /*
+ * Get/validate the stripe index and open the file in the
+ * appropriate fd slot.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX,
+ &stripe_index_attr, &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
+ paths[i], ATTRNAME_STRIPE_INDEX);
+ goto err;
+ }
+ if (finfo->fd[val] != INVALID_FD) {
+ fprintf(stderr, "ERROR: %s: duplicate stripe index: "
+ "%d\n", paths[i], val);
+ goto err;
+ }
+
+ finfo->fd[val] = open(paths[i], O_RDONLY);
+ if (finfo->fd[val] < 0)
+ goto err;
+
+ /*
+ * Get the creation mode for the file.
+ */
+ if (fstat(finfo->fd[val], &sbuf) < 0)
+ goto err;
+ if (finfo->mode == INVALID_MODE) {
+ finfo->mode = sbuf.st_mode;
+ } else if (sbuf.st_mode != finfo->mode) {
+ fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
+ goto err;
+ }
+ }
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ return finfo;
+err:
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return NULL;
+}
+
+static int
+close_files(struct file_stripe_info *finfo)
+{
+ int i, ret;
+
+ if (!finfo)
+ return -1;
+
+ for (i = 0; i < finfo->stripe_count; i++) {
+ if (finfo->fd[i] == INVALID_FD)
+ continue;
+
+ ret = close(finfo->fd[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+/*
+ * Generate the original file using files striped in the coalesced format.
+ * Data in the striped files is stored at a coalesced offset based on the
+ * stripe number.
+ *
+ * Walk through the finfo fds (which are already ordered) and and iteratively
+ * copy stripe_size bytes from the source files to the target file. If a source
+ * file is missing, seek past the associated stripe_size bytes in the target
+ * file.
+ */
+static int
+generate_file_coalesce(int target, struct file_stripe_info *finfo)
+{
+ char *buf;
+ int ret = 0;
+ int r, w, i;
+
+ buf = malloc(finfo->stripe_size);
+ if (!buf)
+ return -1;
+
+ i = 0;
+ while (1) {
+ if (finfo->fd[i] == INVALID_FD) {
+ if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
+ break;
+
+ i = (i + 1) % finfo->stripe_count;
+ continue;
+ }
+
+ r = read(finfo->fd[i], buf, finfo->stripe_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ if (!r)
+ break;
+
+ w = write(target, buf, r);
+ if (w < 0) {
+ ret = w;
+ break;
+ }
+
+ i = (i + 1) % finfo->stripe_count;
+ }
+
+ free(buf);
+ return ret;
+}
+
+/*
+ * Generate the original file using files striped with the traditional stripe
+ * format. Data in the striped files is stored at the equivalent offset from
+ * the source file.
+ */
+static int
+generate_file_traditional(int target, struct file_stripe_info *finfo)
+{
+ int i, j, max_ret, ret;
+ char buf[finfo->stripe_count][4096];
+
do {
char newbuf[4096] = {0, };
- int j;
max_ret = 0;
- for (i=0; i<argc-1; i++) {
- memset (buf[i], 0, 4096);
- ret = read (fds[i], buf[i], 4096);
+ for (i = 0; i < finfo->stripe_count; i++) {
+ memset(buf[i], 0, 4096);
+ ret = read(finfo->fd[i], buf[i], 4096);
if (ret > max_ret)
max_ret = ret;
}
- for (i=0; i<max_ret;i++)
- for (j=0; j<argc-1; j++)
+ for (i = 0; i < max_ret; i++)
+ for (j = 0; j < finfo->stripe_count; j++)
newbuf[i] |= buf[j][i];
- write (1, newbuf, max_ret);
+ write(target, newbuf, max_ret);
} while (max_ret);
return 0;
}
+static int
+generate_file(int target, struct file_stripe_info *finfo)
+{
+ if (finfo->coalesce)
+ return generate_file_coalesce(target, finfo);
+
+ return generate_file_traditional(target, finfo);
+}
+
+static void
+usage(char *name)
+{
+ fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> "
+ "<inputfile2> ...\n", name);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int file_count, opt;
+ char *opath = NULL;
+ int targetfd;
+ struct file_stripe_info *finfo;
+
+ while ((opt = getopt(argc, argv, "o:")) != -1) {
+ switch (opt) {
+ case 'o':
+ opath = optarg;
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ file_count = argc - optind;
+
+ if (!opath || !file_count) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ finfo = validate_and_open_files(&argv[optind], file_count);
+ if (!finfo)
+ goto err;
+
+ targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode);
+ if (targetfd < 0)
+ goto err;
+
+ if (generate_file(targetfd, finfo) < 0)
+ goto err;
+
+ if (fsync(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+ if (close(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+
+ close_files(finfo);
+ free(finfo);
+
+ return 0;
+
+err:
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return -1;
+}
+
diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
new file mode 100644
index 000000000..3fc656b82
--- /dev/null
+++ b/extras/systemd/Makefile.am
@@ -0,0 +1,11 @@
+
+CLEANFILES =
+
+SYSTEMD_DIR = @systemddir@
+
+install-exec-local:
+ @if [ -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(SYSTEMD_DIR); \
+ $(INSTALL_PROGRAM) glusterd.service $(DESTDIR)$(SYSTEMD_DIR)/; \
+ fi
+
diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
new file mode 100644
index 000000000..fc8d8c9a2
--- /dev/null
+++ b/extras/systemd/glusterd.service.in
@@ -0,0 +1,14 @@
+[Unit]
+Description=GlusterFS, a clustered file-system server
+After=network.target rpcbind.service
+Before=network-online.target
+
+[Service]
+Type=forking
+PIDFile=/run/glusterd.pid
+LimitNOFILE=65536
+ExecStart=@prefix@/sbin/glusterd -p /run/glusterd.pid
+KillMode=process
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/test/bug-920583.t b/extras/test/bug-920583.t
new file mode 100755
index 000000000..eedbb800a
--- /dev/null
+++ b/extras/test/bug-920583.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+mount -t glusterfs $H0:/$V0 $M0 -o "xlator-option=*dht.assert-no-child-down=yes,xlator-option=*dht.lookup-unhashed=yes"
+touch $M0/file1;
+
+new_lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+new_no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+EXPECT "1" expr $new_lookup_unhashed_count - $lookup_unhashed_count
+EXPECT "1" expr $new_no_child_down_count - $no_child_down_count
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/extras/test/gluster_commands.sh b/extras/test/gluster_commands.sh
index e1e396020..cb2a55fd5 100755
--- a/extras/test/gluster_commands.sh
+++ b/extras/test/gluster_commands.sh
@@ -1,21 +1,13 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# This script tests the basics gluster cli commands.
@@ -54,13 +46,13 @@ gluster volume start vol
gluster volume info
sleep 1
mount -t glusterfs `hostname`:vol /mnt/client
-sleep 1
+sleep 1
df -h
echo "adding-brick......."
gluster volume add-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
mount -t glusterfs `hostname`:vol /mnt/client
df -h
@@ -102,7 +94,7 @@ sleep 1
echo "removing brick......."
gluster --mode=script volume remove-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -127,7 +119,7 @@ sleep 1
echo "starting replicate volume......"
gluster volume start mirror
gluster volume info
-sleep 1
+sleep 1
mount -t glusterfs `hostname`:mirror /mnt/client
sleep 1
df -h
@@ -136,7 +128,7 @@ sleep 1
echo "adding-brick......."
gluster volume add-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -178,14 +170,14 @@ sleep 1
echo "removeing-brick....."
gluster --mode=script volume remove-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
echo "stopping replicate volume....."
gluster --mode=script volume stop mirror
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
df -h
@@ -206,14 +198,14 @@ gluster volume start str
gluster volume info
sleep 1
mount -t glusterfs `hostname`:str /mnt/client
-sleep 1
+sleep 1
df -h
sleep 1
echo "adding brick...."
gluster volume add-brick str `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
diff --git a/extras/test/ld-preload-test/ld-preload-lib.c b/extras/test/ld-preload-test/ld-preload-lib.c
index e17305a4a..88afd14c3 100644
--- a/extras/test/ld-preload-test/ld-preload-lib.c
+++ b/extras/test/ld-preload-test/ld-preload-lib.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/* LD PRELOAD'able library
* A very simple library that intercepts booster supported system calls
* and prints a log message to stdout.
diff --git a/extras/test/ld-preload-test/ld-preload-test.c b/extras/test/ld-preload-test/ld-preload-test.c
index 55dd98805..78772f598 100644
--- a/extras/test/ld-preload-test/ld-preload-test.c
+++ b/extras/test/ld-preload-test/ld-preload-test.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/*
* LD PRELOAD Test Tool
*
diff --git a/extras/test/run.sh b/extras/test/run.sh
index 2440af237..4b3839cf6 100755
--- a/extras/test/run.sh
+++ b/extras/test/run.sh
@@ -1,21 +1,12 @@
#!/bin/sh
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# Running gluster sanity test which starts glusterd and runs gluster commands, and exit at the first failure.
$PWD/gluster_commands.sh
diff --git a/extras/test/stop_glusterd.sh b/extras/test/stop_glusterd.sh
index a84689beb..a2db13f42 100755
--- a/extras/test/stop_glusterd.sh
+++ b/extras/test/stop_glusterd.sh
@@ -1,21 +1,12 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
#This script stops the glusterd running on the machine. Helpful for gluster sanity script
diff --git a/extras/test/test-ffop.c b/extras/test/test-ffop.c
index 2c47ab004..2d174d452 100644
--- a/extras/test/test-ffop.c
+++ b/extras/test/test-ffop.c
@@ -6,20 +6,82 @@
#include <attr/xattr.h>
#include <errno.h>
#include <string.h>
+#include <dirent.h>
+
+int fd_based_fops_1 (char *filename); //for fd based fops after unlink
+int fd_based_fops_2 (char *filename); //for fd based fops before unlink
+int dup_fd_based_fops (char *filename); // fops based on fd after dup
+int path_based_fops (char *filename); //for fops based on path
+int dir_based_fops (char *filename); // for fops which operate on directory
+int link_based_fops (char *filename); //for fops which operate in link files (symlinks)
+int test_open_modes (char *filename); // to test open syscall with open modes available.
+int generic_open_read_write (char *filename, int flag); // generic function which does open write and read.
int
main (int argc, char *argv[])
{
- int ret = -1;
- int fd = 0;
- char *filename = NULL;
- struct stat stbuf = {0,};
+ int ret = -1;
+ char filename[255] = {0,};
if (argc > 1)
- filename = argv[1];
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
+
+ ret = fd_based_fops_1 (strcat(filename, "_1"));
+ if (ret < 0)
+ fprintf (stderr, "fd based file operation 1 failed\n");
+ else
+ fprintf (stdout, "fd based file operation 1 passed\n");
+
+ ret = fd_based_fops_2 (strcat(filename, "_2"));
+ if (ret < 0)
+ fprintf (stderr, "fd based file operation 2 failed\n");
+ else
+ fprintf (stdout, "fd based file operation 2 passed\n");
+
+ ret = dup_fd_based_fops (strcat (filename, "_3"));
+ if (ret < 0)
+ fprintf (stderr, "dup fd based file operation failed\n");
+ else
+ fprintf (stdout, "dup fd based file operation passed\n");
- if (!filename)
- filename = "temp-xattr-test-file";
+ ret = path_based_fops (strcat (filename, "_4"));
+ if (ret < 0)
+ fprintf (stderr, "path based file operation failed\n");
+ else
+ fprintf (stdout, "path based file operation passed\n");
+
+ ret = dir_based_fops (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "directory based file operation failed\n");
+ else
+ fprintf (stdout, "directory based file operation passed\n");
+
+ ret = link_based_fops (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "link based file operation failed\n");
+ else
+ fprintf (stdout, "link based file operation passed\n");
+
+ ret = test_open_modes (strcat (filename, "_5"));
+ if (ret < 0)
+ fprintf (stderr, "testing modes of 'open' call failed\n");
+ else
+ fprintf (stdout, "testing modes of 'open' call passed\n");
+
+out:
+ return ret;
+}
+
+int
+fd_based_fops_1 (char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
fd = open (filename, O_RDWR|O_CREAT);
if (fd < 0) {
@@ -34,6 +96,34 @@ main (int argc, char *argv[])
goto out;
}
+ strcpy (wstr, "This is my string\n");
+ ret = write (fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
ret = ftruncate (fd, 0);
if (ret < 0) {
fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
@@ -103,3 +193,678 @@ out:
return ret;
}
+
+
+int
+fd_based_fops_2 (char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
+
+ fd = open (filename, O_RDWR|O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = ftruncate (fd, 0);
+
+ if (ret < 0) {
+ fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (wstr, "This is my second string\n");
+ ret = write (fd, wstr, strlen (wstr));
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (fd, rstr, strlen (wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = fstat (fd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchmod (fd, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchown (fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fdatasync (fd);
+ if (ret < 0) {
+ fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flistxattr (fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fremovexattr (fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close (fd);
+ unlink (filename);
+
+ return ret;
+}
+
+int
+path_based_fops (char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ struct stat stbuf = {0,};
+ char newfilename[255] = {0,};
+
+ fd = creat (filename, 0644);
+ if (fd < 0) {
+ fprintf (stderr, "creat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = truncate (filename, 0);
+ if (ret < 0) {
+ fprintf (stderr, "truncate failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = stat (filename, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chmod (filename, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chown (filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "chown failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = setxattr (filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = listxattr (filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "listxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = getxattr (filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "getxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = removexattr (filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "removexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = access (filename, R_OK|W_OK);
+ if (ret < 0) {
+ fprintf (stderr, "access failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename (filename, newfilename);
+ if (ret < 0) {
+ fprintf (stderr, "rename failed: %s\n", strerror (errno));
+ goto out;
+ }
+ unlink (newfilename);
+
+out:
+ if (fd)
+ close (fd);
+
+ unlink (filename);
+ return ret;
+}
+
+int
+dup_fd_based_fops (char *filename)
+{
+ int fd = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char wstr[50] = {0,};
+ char rstr[50] = {0,};
+
+ fd = open (filename, O_RDWR|O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "open failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ newfd = dup (fd);
+ if (newfd < 0) {
+ ret = -1;
+ fprintf (stderr, "dup failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ close (fd);
+
+ strcpy (wstr, "This is my string\n");
+ ret = write (newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lseek (newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf (stderr, "lseek failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = read (newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "read failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = memcmp (rstr, wstr, strlen (wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf (stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate (newfd, 0);
+ if (ret < 0) {
+ fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fstat (newfd, &stbuf);
+ if (ret < 0) {
+ fprintf (stderr, "fstat failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchmod (newfd, 0640);
+ if (ret < 0) {
+ fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fchown (newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (newfd);
+ if (ret < 0) {
+ fprintf (stderr, "fsync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fsetxattr (newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fdatasync (newfd);
+ if (ret < 0) {
+ fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = flistxattr (newfd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fgetxattr (newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = fremovexattr (newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (newfd)
+ close (newfd);
+ ret = unlink (filename);
+ if (ret < 0) {
+ fprintf (stderr, "unlink failed : %s\n", strerror (errno));
+ goto out;
+ }
+
+ return ret;
+}
+
+int
+dir_based_fops (char *dirname)
+{
+ int ret = -1;
+ DIR *dp = NULL;
+ char buff[255] = {0,};
+ struct dirent *dbuff = {0,};
+ struct stat stbuff = {0,};
+ char newdname[255] = {0,};
+ char *cwd = NULL;
+
+ ret = mkdir (dirname, 0755);
+ if (ret < 0) {
+ fprintf (stderr, "mkdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ dp = opendir (dirname);
+ if (dp == NULL) {
+ fprintf (stderr, "opendir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ dbuff = readdir (dp);
+ if (NULL == dbuff) {
+ fprintf (stderr, "readdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = closedir (dp);
+ if (ret < 0) {
+ fprintf (stderr, "closedir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = stat (dirname, &stbuff);
+ if (ret < 0) {
+ fprintf (stderr, "stat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chmod (dirname, 0744);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = chown (dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "chmod failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = setxattr (dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "setxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = listxattr (dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "listxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = getxattr (dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf (stderr, "getxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = removexattr (dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "removexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newdname, dirname);
+ strcat (newdname, "/../");
+ ret = chdir (newdname);
+ if (ret < 0) {
+ fprintf (stderr, "chdir failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ cwd = getcwd (buff, 255);
+ if (NULL == cwd) {
+ fprintf (stderr, "getcwd failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newdname, dirname);
+ strcat (newdname, "new");
+ ret = rename (dirname, newdname);
+ if (ret < 0) {
+ fprintf (stderr, "rename failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = rmdir (newdname);
+ if (ret < 0) {
+ fprintf (stderr, "rmdir failed: %s\n", strerror (errno));
+ return ret;
+ }
+
+out:
+ rmdir (dirname);
+ return ret;
+}
+
+int
+link_based_fops (char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ char newname[255] = {0,};
+ char linkname[255] = {0,};
+ struct stat lstbuf = {0,};
+
+ fd = creat (filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf (stderr, "creat failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (newname, filename);
+ strcat (newname, "_hlink");
+ ret = link (filename, newname);
+ if (ret < 0) {
+ fprintf (stderr, "link failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = unlink (filename);
+ if (ret < 0) {
+ fprintf (stderr, "unlink failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ strcpy (linkname, filename);
+ strcat (linkname, "_slink");
+ ret = symlink (newname, linkname);
+ if (ret < 0) {
+ fprintf (stderr, "symlink failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lstat (linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf (stderr, "lstbuf failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lchown (linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf (stderr, "lchown failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lsetxattr (linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf (stderr, "lsetxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = llistxattr (linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "llistxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lgetxattr (linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf (stderr, "lgetxattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+ ret = lremovexattr (linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf (stderr, "lremovexattr failed: %s\n", strerror (errno));
+ goto out;
+ }
+
+
+out:
+ if (fd)
+ close(fd);
+ unlink (linkname);
+ unlink (newname);
+}
+
+int
+test_open_modes (char *filename)
+{
+ int ret = -1;
+
+ ret = generic_open_read_write (filename, O_CREAT|O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ goto out;
+ }
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDWR);
+ if (ret != 0) {
+ fprintf (stderr, "flag O_CREAT|O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDONLY);
+ if (ret != 0) {
+ fprintf (stderr, "flag O_CREAT|O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_WRONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_RDWR);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_WRONLY);
+ if (3 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ goto out;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ goto out;
+ }
+#endif
+
+ ret = generic_open_read_write (filename, O_CREAT|O_RDWR|O_SYNC);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ goto out;
+ }
+
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_CREAT|O_EXCL);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_CREAT|O_EXCL failed\n");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int generic_open_read_write (char *filename, int flag)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {0,};
+ char rstring[50] = {0,};
+
+ fd = open (filename, flag);
+ if (fd < 0) {
+ if (flag == O_CREAT|O_EXCL && errno == EEXIST) {
+ unlink (filename);
+ return 0;
+ }
+ else {
+ fd = 0;
+ fprintf (stderr, "open failed: %s\n", strerror (errno));
+ return 1;
+ }
+ }
+
+ strcpy (wstring, "My string to write\n");
+ ret = write (fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf (stderr, "write failed: %s\n", strerror (errno));
+ close (fd);
+ unlink(filename);
+ return 2;
+ }
+ }
+
+ ret = lseek (fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close (fd);
+ unlink(filename);
+ return 4;
+ }
+
+ ret = read (fd, rstring, strlen(wstring));
+ if (ret < 0) {
+ close (fd);
+ unlink (filename);
+ return 3;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp (wstring, rstring, strlen (wstring));
+ if (0 != ret && !(flag == O_CREAT|O_RDONLY || flag == O_RDONLY ||\
+ flag == O_TRUNC|O_RDONLY)) {
+ fprintf (stderr, "read is returning junk\n");
+ close (fd);
+ unlink (filename);
+ return 4;
+ }
+
+ close (fd);
+ unlink (filename);
+ return 0;
+}
diff --git a/extras/volfilter.py b/extras/volfilter.py
new file mode 100644
index 000000000..0ca456a78
--- /dev/null
+++ b/extras/volfilter.py
@@ -0,0 +1,167 @@
+# Copyright (c) 2010-2011 Red Hat, Inc.
+#
+# This file is part of HekaFS.
+#
+# HekaFS is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License, version 3, as published by the Free
+# Software Foundation.
+#
+# HekaFS is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License * along
+# with HekaFS. If not, see <http://www.gnu.org/licenses/>.
+
+import copy
+import string
+import sys
+import types
+
+good_xlators = [
+ "cluster/afr",
+ "cluster/dht",
+ "cluster/distribute",
+ "cluster/replicate",
+ "cluster/stripe",
+ "debug/io-stats",
+ "features/access-control",
+ "features/locks",
+ "features/marker",
+ "features/uidmap",
+ "performance/io-threads",
+ "protocol/client",
+ "protocol/server",
+ "storage/posix",
+]
+
+def copy_stack (old_xl,suffix,recursive=False):
+ if recursive:
+ new_name = old_xl.name + "-" + suffix
+ else:
+ new_name = suffix
+ new_xl = Translator(new_name)
+ new_xl.type = old_xl.type
+ # The results with normal assignment here are . . . amusing.
+ new_xl.opts = copy.deepcopy(old_xl.opts)
+ for sv in old_xl.subvols:
+ new_xl.subvols.append(copy_stack(sv,suffix,True))
+ # Patch up the path at the bottom.
+ if new_xl.type == "storage/posix":
+ new_xl.opts["directory"] += ("/" + suffix)
+ return new_xl
+
+def cleanup (parent, graph):
+ if parent.type in good_xlators:
+ # Temporary fix so that HekaFS volumes can use the
+ # SSL-enabled multi-threaded socket transport.
+ if parent.type == "protocol/server":
+ parent.type = "protocol/server2"
+ parent.opts["transport-type"] = "ssl"
+ elif parent.type == "protocol/client":
+ parent.type = "protocol/client2"
+ parent.opts["transport-type"] = "ssl"
+ sv = []
+ for child in parent.subvols:
+ sv.append(cleanup(child,graph))
+ parent.subvols = sv
+ else:
+ parent = cleanup(parent.subvols[0],graph)
+ return parent
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in types.StringTypes:
+ fp = file(path,"r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError, "nested volume definition"
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError, "text outside volume definition"
+ if text[0] == "type":
+ xlator.type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = string.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError, "unrecognized keyword %s" % text[0]
+ if xlator:
+ raise RuntimeError, "unclosed volume definition"
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph,sv,stream)
+ print >> stream, ""
+ sv.dumped = True
+ print >> stream, "volume %s" % last.name
+ print >> stream, " type %s" % last.type
+ for k, v in last.opts.iteritems():
+ print >> stream, " option %s %s" % (k, v)
+ if last.subvols:
+ print >> stream, " subvolumes %s" % string.join(
+ [ sv.name for sv in last.subvols ])
+ print >> stream, "end-volume"
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ suffix = "-" + old_xl.type.split("/")[1]
+ if len(old_xl.name) > len(suffix):
+ if old_xl.name[-len(suffix):] == suffix:
+ old_xl.name = old_xl.name[:-len(suffix)]
+ new_xl = Translator(old_xl.name+suffix)
+ new_xl.type = old_xl.type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name += ("-" + filt_type.split("/")[1])
+ old_xl.type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+def delete (graph, victim):
+ if len(victim.subvols) != 1:
+ raise RuntimeError, "attempt to delete non-unary translator"
+ for xl in graph.itervalues():
+ while xl.subvols.count(victim):
+ i = xl.subvols.index(victim)
+ xl.subvols[i] = victim.subvols[0]
+
+if __name__ == "__main__":
+ graph, last = load(sys.argv[1])
+ generate(graph,last)
diff --git a/extras/who-wrote-glusterfs/gitdm.aliases b/extras/who-wrote-glusterfs/gitdm.aliases
new file mode 100644
index 000000000..784a3e3bc
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.aliases
@@ -0,0 +1,48 @@
+#
+# This is the email aliases file, mapping secondary addresses onto a single,
+# canonical address. This file should probably match the contents of .mailmap
+# in the root of the git repository.
+#
+# Format: <alias> <real>
+
+amar@gluster.com amarts@redhat.com
+amar@del.gluster.com amarts@redhat.com
+avati@amp.gluster.com avati@redhat.com
+avati@blackhole.gluster.com avati@redhat.com
+avati@dev.gluster.com avati@redhat.com
+avati@gluster.com avati@redhat.com
+wheelear@gmail.com awheeler@redhat.com
+anush@gluster.com ashetty@redhat.com
+csaba@gluster.com csaba@redhat.com
+csaba@lowlife.hu csaba@redhat.com
+csaba@zresearch.com csaba@redhat.com
+harsha@gluster.com fharshav@redhat.com
+harsha@zresearch.com fharshav@redhat.com
+harsha@dev.gluster.com fharshav@redhat.com
+harsha@harshavardhana.net fharshav@redhat.com
+kkeithle@f16node1.kkeithle.usersys.redhat.com kkeithle@redhat.com
+kaushal@gluster.com kaushal@redhat.com
+kaushikbv@gluster.com kbudiger@redhat.com
+krishna@gluster.com ksriniva@redhat.com
+krishna@zresearch.com ksriniva@redhat.com
+krishna@guest-laptop ksriniva@redhat.com
+kp@gluster.com kparthas@redhat.com
+me@louiszuckerman.com louiszuckerman@gmail.com
+msvbhat@gmail.com vbhat@redhat.com
+vishwanath@gluster.com vbhat@redhat.com
+pavan@dev.gluster.com pavan@gluster.com
+zaitcev@yahoo.com zaitcev@kotori.zaitcev.us
+pranithk@gluster.com pkarampu@redhat.com
+raghavendrabhat@gluster.com raghavendra@redhat.com
+raghavendra@gluster.com rgowdapp@redhat.com
+raghavendra@zresearch.com rgowdapp@redhat.com
+rahulcssjce@gmail.com rahulcs@redhat.com
+rajesh@gluster.com rajesh@redhat.com
+rajesh.amaravathi@gmail.com rajesh@redhat.com
+shehjart@zresearch.com shehjart@gluster.com
+venky@gluster.com vshankar@redhat.com
+vijay@gluster.com vbellur@redhat.com
+vijay@dev.gluster.com vbellur@redhat.com
+vijaykumar.koppad@gmail.com vkoppad@redhat.com
+vikas@zresearch.com vikas@gluster.com
+shishirng@gluster.com sgowda@redhat.com
diff --git a/extras/who-wrote-glusterfs/gitdm.config b/extras/who-wrote-glusterfs/gitdm.config
new file mode 100644
index 000000000..e1ff2bd5b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.config
@@ -0,0 +1,8 @@
+#
+# This is the gitdm configuration file for GlusterFS.
+# See the gitdm.config in the gitdm repositofy for additional options and
+# comments.
+#
+
+EmailAliases gitdm.aliases
+EmailMap gitdm.domain-map
diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map
new file mode 100644
index 000000000..f1c305898
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.domain-map
@@ -0,0 +1,15 @@
+#
+# Here is a set of mappings of domain names onto employer names.
+#
+active.by ActiveCloud
+cern.ch CERN
+gluster.com Red Hat
+gooddata.com GoodData
+hastexo.com hastexo
+ibm.com IBM
+linbit.com LINBIT
+netbsd.org NetBSD
+netdirect.ca Net Direct
+redhat.com Red Hat
+stepping-stone.ch stepping stone GmbH
+zresearch.com Red Hat
diff --git a/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
new file mode 100755
index 000000000..487f5874b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
@@ -0,0 +1,50 @@
+#!/bin/sh
+#
+# Gather statistics on "Who wrote GlusterFS". The idea comes from the excellent
+# articles on http://lwn.net/ named "Who wrote <linux-version>?".
+#
+# gitdm comes from git://git.lwn.net/gitdm.git by Jonathan Corbet.
+#
+# Confguration files used:
+# - gitdm.config: main configuration file, pointing to the others
+# - gitdm.aliases: merge users with different emailaddresses into one
+# - gitdm.domain-map: map domain names from emailaddresses to companies
+#
+
+DIRNAME=$(dirname $0)
+
+GITDM_REPO=git://git.lwn.net/gitdm.git
+GITDM_DIR=${DIRNAME}/gitdm
+GITDM_CMD="python ${GITDM_DIR}/gitdm"
+
+error()
+{
+ local ret=${?}
+ printf "${@}\n" > /dev/stderr
+ return ${ret}
+}
+
+check_gitdm()
+{
+ if [ ! -e "${GITDM_DIR}/gitdm" ]
+ then
+ git clone --quiet git://git.lwn.net/gitdm.git ${DIRNAME}/gitdm
+ fi
+}
+
+# The first argument is the revision-range (see 'git rev-list --help').
+# REV can be empty, and the statistics will be calculated over the whole
+# current branch.
+REV=${1}
+shift
+# all remaining options are passed to gitdm, see the gitdm script for an
+# explanation of the accepted options.
+GITDM_OPTS=${@}
+
+if ! check_gitdm
+then
+ error "Could not find 'gitdm', exiting..."
+ exit 1
+fi
+
+git log --numstat -M ${REV} | ${GITDM_CMD} -b ${DIRNAME} -n ${GITDM_OPTS}
diff --git a/gen-headers.py b/gen-headers.py
new file mode 100755
index 000000000..ef9fa7711
--- /dev/null
+++ b/gen-headers.py
@@ -0,0 +1,54 @@
+#!/usr/bin/python
+
+import sys
+try:
+ import json
+except ImportError:
+ import simplejson as json
+from string import Template
+
+
+def getLogBook(logFile='error-codes.json'):
+ fp = open(logFile)
+ return json.load(fp)
+
+
+def genCHeader(logBook,
+ infile='gf-error-codes.h.template',
+ outfile='gf-error-codes.h'):
+ fp = open('gf-error-codes.h.template')
+ s = fp.read()
+ fp.close()
+ template = Template(s)
+
+ defineLines = []
+ caseLines = []
+ for name, value in logBook.iteritems():
+ nameDef = "GF_%s" % (name.upper(),)
+ code = value['code']
+ msgNameDef = "%s_MSG" % (nameDef,)
+ msg = value['message']['en']
+
+ defineLines.append("#define %-20s %d" % (nameDef, code))
+ defineLines.append("#define %-20s %s" % (msgNameDef,
+ json.dumps(msg)))
+ caseLines.append(" case %s: return _(%s);" % \
+ (nameDef, msgNameDef))
+
+ d = {'DEFINES': "\n".join(defineLines),
+ 'CASES': "\n".join(caseLines)}
+ #print template.substitute(d)
+
+ fp = open(outfile, 'w')
+ fp.write(template.substitute(d))
+ fp.close()
+
+
+if __name__ == "__main__":
+ try:
+ logBook = getLogBook()
+ genCHeader(logBook)
+ sys.exit(0)
+ except IOError, e:
+ print str(e)
+ sys.exit(-1)
diff --git a/xlators/features/marker/utils/Makefile.am b/geo-replication/Makefile.am
index 556951d9f..556951d9f 100644
--- a/xlators/features/marker/utils/Makefile.am
+++ b/geo-replication/Makefile.am
diff --git a/geo-replication/src/Makefile.am b/geo-replication/src/Makefile.am
new file mode 100644
index 000000000..324d8869f
--- /dev/null
+++ b/geo-replication/src/Makefile.am
@@ -0,0 +1,33 @@
+
+gsyncddir = $(libexecdir)/glusterfs
+
+gsyncd_SCRIPTS = gverify.sh peer_add_secret_pub peer_gsec_create
+
+# peer_gsec_create and peer_add_secret_pub are not added to
+# EXTRA_DIST as it's derived from a .in file
+EXTRA_DIST = gverify.sh
+
+gsyncd_PROGRAMS = gsyncd
+
+gsyncd_SOURCES = gsyncd.c procdiggy.c
+
+gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(GF_GLUSTERFS_LIBS)
+
+gsyncd_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = procdiggy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src\
+ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
+ -DUSE_LIBGLUSTERFS\
+ -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/marker/utils/src/gsyncd.c b/geo-replication/src/gsyncd.c
index 9c598ce66..0830e7f9b 100644
--- a/xlators/features/marker/utils/src/gsyncd.c
+++ b/geo-replication/src/gsyncd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -29,6 +19,17 @@
#include <string.h>
#include <sys/param.h> /* for PATH_MAX */
+/* NOTE (USE_LIBGLUSTERFS):
+ * ------------------------
+ * When USE_LIBGLUSTERFS debugging sumbol is passed; perform
+ * glusterfs translator like initialization so that glusterfs
+ * globals, contexts are valid when glustefs api's are invoked.
+ * We unconditionally pass then while building gsyncd binary.
+ */
+#ifdef USE_LIBGLUSTERFS
+#include "glusterfs.h"
+#include "globals.h"
+#endif
#include "common-utils.h"
#include "run.h"
@@ -36,7 +37,7 @@
#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
-#define GSYNCD_CONF "geo-replication/gsyncd.conf"
+#define GSYNCD_CONF_TEMPLATE "geo-replication/gsyncd_template.conf"
#define GSYNCD_PY "gsyncd.py"
#define RSYNC "rsync"
@@ -46,12 +47,14 @@ static int
duplexpand (void **buf, size_t tsiz, size_t *len)
{
size_t osiz = tsiz * *len;
-
- *buf = realloc (*buf, osiz << 1);
- if (!buf)
+ char *p = realloc (*buf, osiz << 1);
+ if (!p) {
+ free(*buf);
return -1;
+ }
- memset ((char *)*buf + osiz, 0, osiz);
+ memset (p + osiz, 0, osiz);
+ *buf = p;
*len <<= 1;
return 0;
@@ -61,6 +64,7 @@ static int
str2argv (char *str, char ***argv)
{
char *p = NULL;
+ char *savetok = NULL;
int argc = 0;
size_t argv_len = 32;
int ret = 0;
@@ -74,7 +78,7 @@ str2argv (char *str, char ***argv)
if (!*argv)
goto error;
- while ((p = strtok (str, " "))) {
+ while ((p = strtok_r (str, " ", &savetok))) {
str = NULL;
argc++;
@@ -104,8 +108,10 @@ invoke_gsyncd (int argc, char **argv)
int i = 0;
int j = 0;
char *nargv[argc + 4];
+ char *python = NULL;
if (restricted) {
+ size_t len;
/* in restricted mode we forcibly use the system-wide config */
runinit (&runner);
runner_add_args (&runner, SBIN_DIR"/gluster",
@@ -115,16 +121,17 @@ invoke_gsyncd (int argc, char **argv)
if (runner_start (&runner) == 0 &&
fgets (config_file, PATH_MAX,
runner_chio (&runner, STDOUT_FILENO)) != NULL &&
- config_file[strlen (config_file) - 1] == '\n' &&
+ (len = strlen (config_file)) &&
+ config_file[len - 1] == '\n' &&
runner_end (&runner) == 0)
- gluster_workdir_len = strlen (config_file) - 1;
+ gluster_workdir_len = len - 1;
if (gluster_workdir_len) {
- if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF) + 1 >
+ if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF_TEMPLATE) + 1 >
PATH_MAX)
goto error;
config_file[gluster_workdir_len] = '/';
- strcat (config_file, GSYNCD_CONF);
+ strcat (config_file, GSYNCD_CONF_TEMPLATE);
} else
goto error;
@@ -136,7 +143,10 @@ invoke_gsyncd (int argc, char **argv)
goto error;
j = 0;
- nargv[j++] = PYTHON;
+ python = getenv("PYTHON");
+ if(!python)
+ python = PYTHON;
+ nargv[j++] = python;
nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY;
for (i = 1; i < argc; i++)
nargv[j++] = argv[i];
@@ -146,9 +156,9 @@ invoke_gsyncd (int argc, char **argv)
}
nargv[j++] = NULL;
- execvp (PYTHON, nargv);
+ execvp (python, nargv);
- fprintf (stderr, "exec of "PYTHON" failed\n");
+ fprintf (stderr, "exec of '%s' failed\n", python);
return 127;
error:
@@ -275,6 +285,46 @@ invoke_rsync (int argc, char **argv)
return 1;
}
+static int
+invoke_gluster (int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ int optsover = 0;
+ char *ov = NULL;
+
+ for (i = 1; i < argc; i++) {
+ ov = strtail (argv[i], "--");
+ if (ov && !optsover) {
+ if (*ov == '\0')
+ optsover = 1;
+ continue;
+ }
+ switch (++j) {
+ case 1:
+ if (strcmp (argv[i], "volume") != 0)
+ goto error;
+ break;
+ case 2:
+ if (strcmp (argv[i], "info") != 0)
+ goto error;
+ break;
+ case 3:
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ argv[0] = "gluster";
+ execvp (SBIN_DIR"/gluster", argv);
+ fprintf (stderr, "exec of gluster failed\n");
+ return 127;
+
+ error:
+ fprintf (stderr, "disallowed gluster invocation\n");
+ return 1;
+}
struct invocable {
char *name;
@@ -282,8 +332,9 @@ struct invocable {
};
struct invocable invocables[] = {
- { "rsync", invoke_rsync },
- { "gsyncd", invoke_gsyncd },
+ { "rsync", invoke_rsync },
+ { "gsyncd", invoke_gsyncd },
+ { "gluster", invoke_gluster },
{ NULL, NULL}
};
@@ -295,6 +346,19 @@ main (int argc, char **argv)
char *b = NULL;
char *sargv = NULL;
+#ifdef USE_LIBGLUSTERFS
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx)
+ return ENOMEM;
+
+ if (glusterfs_globals_init (ctx))
+ return 1;
+
+ THIS->ctx = ctx;
+#endif
+
evas = getenv (_GLUSTERD_CALLED_);
if (evas && strcmp (evas, "1") == 0)
/* OK, we know glusterd called us, no need to look for further config
diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
new file mode 100755
index 000000000..bd1b25f24
--- /dev/null
+++ b/geo-replication/src/gverify.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+
+# Script to verify the Master and Slave Gluster compatibility.
+# To use ./gverify <master volume> <slave host> <slave volume>
+# Returns 0 if master and slave compatible.
+
+# Considering buffer_size 100MB
+BUFFER_SIZE=104857600;
+slave_log_file=`gluster --print-logdir`/geo-replication-slaves/slave.log
+
+function SSHM()
+{
+ ssh -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+}
+
+function cmd_master()
+{
+ VOL=$1;
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+v=\$1;
+d=\$(mktemp -d 2>/dev/null);
+glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d;
+i=\$(stat -c "%i" \$d);
+if [[ "\$i" -ne "1" ]]; then
+echo 0:0;
+exit 1;
+fi;
+cd \$d;
+available_size=\$(df \$d | tail -1 | awk "{print \\\$2}");
+umount -l \$d;
+rmdir \$d;
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$available_size:\$ver;
+};
+cd /tmp;
+[ x$VOL != x ] && do_verify $VOL;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function cmd_slave()
+{
+ VOL=$1;
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+v=\$1;
+d=\$(mktemp -d 2>/dev/null);
+glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id \$v -l $slave_log_file \$d;
+i=\$(stat -c "%i" \$d);
+if [[ "\$i" -ne "1" ]]; then
+echo 0:0;
+exit 1;
+fi;
+cd \$d;
+available_size=\$(df \$d | tail -1 | awk "{print \\\$4}");
+no_of_files=\$(find \$d -maxdepth 0 -empty);
+umount -l \$d;
+rmdir \$d;
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$available_size:\$ver:\$no_of_files:;
+};
+cd /tmp;
+[ x$VOL != x ] && do_verify $VOL;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function master_stats()
+{
+ MASTERVOL=$1;
+ local cmd_line;
+ cmd_line=$(cmd_master $MASTERVOL);
+ bash -c "$cmd_line";
+}
+
+
+function slave_stats()
+{
+ SLAVEHOST=$1;
+ SLAVEVOL=$2;
+ local cmd_line;
+ cmd_line=$(cmd_slave $SLAVEVOL);
+ SSHM $SLAVEHOST bash -c "'$cmd_line'";
+}
+
+
+function main()
+{
+ log_file=$4
+ > $log_file
+
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+
+ ping -w 5 $2;
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|$2 not reachable." > $log_file
+ exit 1;
+ fi;
+
+ ssh -oNumberOfPasswordPrompts=0 $2 "echo Testing_Passwordless_SSH";
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|Passwordless ssh login has not been setup with $2." > $log_file
+ exit 1;
+ fi;
+
+ ERRORS=0;
+ master_data=$(master_stats $1);
+ slave_data=$(slave_stats $2 $3);
+ master_size=$(echo $master_data | cut -f1 -d':');
+ slave_size=$(echo $slave_data | cut -f1 -d':');
+ master_version=$(echo $master_data | cut -f2 -d':');
+ slave_version=$(echo $slave_data | cut -f2 -d':');
+ slave_no_of_files=$(echo $slave_data | cut -f3 -d':');
+
+ if [[ "x$master_size" = "x" || "x$master_version" = "x" || "$master_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to fetch master volume details. Please check the master cluster and master volume." > $log_file;
+ exit 1;
+ fi;
+
+ if [[ "x$slave_size" = "x" || "x$slave_version" = "x" || "$slave_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to fetch slave volume details. Please check the slave cluster and slave volume." > $log_file;
+ exit 1;
+ fi;
+
+ # The above checks are mandatory and force command should be blocked
+ # if they fail. The checks below can be bypassed if force option is
+ # provided hence no FORCE_BLOCKER flag.
+
+ if [ ! $slave_size -ge $(($master_size - $BUFFER_SIZE )) ]; then
+ echo "Total size of master is greater than available size of slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [ -z $slave_no_of_files ]; then
+ echo "$2::$3 is not empty. Please delete existing files in $2::$3 and retry, or use force to continue without deleting the existing files." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [[ $master_version > $slave_version ]]; then
+ echo "Gluster version mismatch between master and slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ exit $ERRORS;
+}
+
+
+main "$@";
diff --git a/geo-replication/src/peer_add_secret_pub.in b/geo-replication/src/peer_add_secret_pub.in
new file mode 100644
index 000000000..c036cf334
--- /dev/null
+++ b/geo-replication/src/peer_add_secret_pub.in
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+if [ ! -d ~/.ssh ]; then
+ mkdir ~/.ssh;
+ chmod 700 ~/.ssh
+ chown root:root ~/.ssh
+fi
+
+cat "$GLUSTERD_WORKING_DIR"/geo-replication/common_secret.pem.pub >> ~/.ssh/authorized_keys
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
new file mode 100755
index 000000000..ef630bd44
--- /dev/null
+++ b/geo-replication/src/peer_gsec_create.in
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+
+if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem > /dev/null
+fi
+
+output=`echo command=\"@libexecdir@/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub`
+echo $output
diff --git a/xlators/features/marker/utils/src/procdiggy.c b/geo-replication/src/procdiggy.c
index 0baab966d..1eba414c1 100644
--- a/xlators/features/marker/utils/src/procdiggy.c
+++ b/geo-replication/src/procdiggy.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -51,13 +41,15 @@ pidinfo (pid_t pid, char **name)
if (name)
*name = NULL;
for (;;) {
+ size_t len;
memset (buf, 0, sizeof (buf));
if (fgets (buf, sizeof (buf), f) == NULL ||
- buf[strlen (buf) - 1] != '\n') {
+ (len = strlen (buf)) == 0 ||
+ buf[len - 1] != '\n') {
pid = -1;
goto out;
}
- buf[strlen (buf) -1] = '\0';
+ buf[len - 1] = '\0';
if (name && !*name) {
p = strtail (buf, "Name:");
diff --git a/geo-replication/src/procdiggy.h b/geo-replication/src/procdiggy.h
new file mode 100644
index 000000000..56dfc4eb2
--- /dev/null
+++ b/geo-replication/src/procdiggy.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#include <sys/syslimits.h>
+#endif /* __NetBSD__ */
+
+#define PROC "/proc"
+
+pid_t pidinfo (pid_t pid, char **name);
+
+int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data),
+ void *data);
+
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
new file mode 100644
index 000000000..83f969639
--- /dev/null
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -0,0 +1,7 @@
+syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
+
+syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py \
+ resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
+ $(top_builddir)/contrib/ipaddr-py/ipaddr.py libgfchangelog.py
+
+CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
index d45006932..67f346ace 100644
--- a/xlators/features/marker/utils/syncdaemon/README.md
+++ b/geo-replication/syncdaemon/README.md
@@ -11,13 +11,13 @@ Requirements are categorized according to this.
* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
* rsync (both)
-* glusterfs with marker support (master); glusterfs (optional on slave)
-* FUSE; for supported versions consult glusterfs
+* glusterfs: with marker and changelog support (master & slave);
+* FUSE: glusterfs fuse module with auxilary gfid based access support
INSTALLATION
------------
-As of now, the supported way of operation is running from the source directory.
+As of now, the supported way of operation is running from the source directory or using the RPMs given.
If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
@@ -39,41 +39,18 @@ The config file format matches the following syntax:
<option2>: <value2>
# comment
-By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_
+By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd_template.conf_
in the source tree.
USAGE
-----
gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
-Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors
-for it with gysncd:
+Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume
+_pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The
+respective gsyncd invocations are (demoing some syntax sugaring):
-1. _/data/mirror_
-2. local gluster volume _yow_
-3. _/data/far_mirror_ at example.com
-4. gluster volume _moz_ at example.com
-
-The respective gsyncd invocations are (demoing some syntax sugaring):
-
-1.
-
- gsyncd.py gluster://localhost:pop file:///data/mirror
-
- or short form
-
- gsyncd.py :pop /data/mirror
-
-2. `gsyncd :pop :yow`
-3.
-
- gsyncd.py :pop ssh://example.com:/data/far_mirror
-
- or short form
-
- gsyncd.py :pop example.com:/data/far_mirror
-
-4. `gsyncd.py :pop example.com::moz`
+`gsyncd.py :pop example.com::moz`
gsyncd has to be available on both sides; it's location on the remote side has to be specified
via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
diff --git a/xlators/features/marker/utils/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py
index e3386afba..e3386afba 100644
--- a/xlators/features/marker/utils/syncdaemon/__codecheck.py
+++ b/geo-replication/syncdaemon/__codecheck.py
diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/geo-replication/syncdaemon/__init__.py
index e69de29bb..e69de29bb 100644
--- a/xlators/features/marker/utils/syncdaemon/__init__.py
+++ b/geo-replication/syncdaemon/__init__.py
diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py
index e55bec519..a326e8246 100644
--- a/xlators/features/marker/utils/syncdaemon/configinterface.py
+++ b/geo-replication/syncdaemon/configinterface.py
@@ -24,9 +24,9 @@ class MultiDict(object):
def __getitem__(self, key):
val = None
for d in self.dicts:
- if d.get(key):
+ if d.get(key) != None:
val = d[key]
- if not val:
+ if val == None:
raise KeyError(key)
return val
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/geo-replication/syncdaemon/gconf.py
index 146c72a18..fe5795f16 100644
--- a/xlators/features/marker/utils/syncdaemon/gconf.py
+++ b/geo-replication/syncdaemon/gconf.py
@@ -12,9 +12,4 @@ class GConf(object):
permanent_handles = []
log_metadata = {}
- @classmethod
- def setup_ssh_ctl(cls, ctld):
- cls.ssh_ctl_dir = ctld
- cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")]
-
gconf = GConf()
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
index 9ac32ce42..7fcc3165a 100644
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -2,22 +2,24 @@
import os
import os.path
+import glob
import sys
import time
import logging
import signal
+import shutil
import optparse
import fcntl
import fnmatch
from optparse import OptionParser, SUPPRESS_HELP
-from logging import Logger
+from logging import Logger, handlers
from errno import EEXIST, ENOENT
from ipaddr import IPAddress, IPNetwork
from gconf import gconf
from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
-from syncdutils import GsyncdError, select, set_term_handler
+from syncdutils import GsyncdError, select, set_term_handler, privileged, update_file
from configinterface import GConffile
import resource
from monitor import monitor
@@ -56,7 +58,43 @@ class GLogger(Logger):
logging.root = cls("root", lvl)
logging.setLoggerClass(cls)
logging.getLogger().handlers = []
- logging.basicConfig(**lprm)
+ logging.getLogger().setLevel(lprm['level'])
+
+ if 'filename' in lprm:
+ try:
+ logging_handler = handlers.WatchedFileHandler(lprm['filename'])
+ formatter = logging.Formatter(fmt=lprm['format'],
+ datefmt=lprm['datefmt'])
+ logging_handler.setFormatter(formatter)
+ logging.getLogger().addHandler(logging_handler)
+ except AttributeError:
+ # Python version < 2.6 will not have WatchedFileHandler
+ # so fallback to logging without any handler.
+ # Note: logrotate will not work if Python version is < 2.6
+ logging.basicConfig(**lprm)
+ else:
+ # If filename not passed(not available in lprm) then it may be
+ # streaming.(Ex: {"stream": "/dev/stdout"})
+ logging.basicConfig(**lprm)
+
+ @classmethod
+ def _gsyncd_loginit(cls, **kw):
+ lkw = {}
+ if gconf.log_level:
+ lkw['level'] = gconf.log_level
+ if kw.get('log_file'):
+ if kw['log_file'] in ('-', '/dev/stderr'):
+ lkw['stream'] = sys.stderr
+ elif kw['log_file'] == '/dev/stdout':
+ lkw['stream'] = sys.stdout
+ else:
+ lkw['filename'] = kw['log_file']
+
+ cls.setup(label=kw.get('label'), **lkw)
+
+ lkw.update({'saved_label': kw.get('label')})
+ gconf.log_metadata = lkw
+ gconf.log_exit = True
def startup(**kw):
"""set up logging, pidfile grabbing, daemonization"""
@@ -88,22 +126,18 @@ def startup(**kw):
select((x,), (), ())
os.close(x)
- lkw = {}
- if gconf.log_level:
- lkw['level'] = gconf.log_level
- if kw.get('log_file'):
- if kw['log_file'] in ('-', '/dev/stderr'):
- lkw['stream'] = sys.stderr
- elif kw['log_file'] == '/dev/stdout':
- lkw['stream'] = sys.stdout
- else:
- lkw['filename'] = kw['log_file']
+ GLogger._gsyncd_loginit(**kw)
+
- GLogger.setup(label=kw.get('label'), **lkw)
+def _unlink(path):
+ try:
+ os.unlink(path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Unlink error: %s' % path)
- lkw.update({'saved_label': kw.get('label')})
- gconf.log_metadata = lkw
- gconf.log_exit = True
def main():
"""main routine, signal/exception handling boilerplates"""
@@ -149,33 +183,67 @@ def main_i():
op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs)
op.add_option('--gluster-log-level', metavar='LVL')
op.add_option('--gluster-params', metavar='PRMS', default='')
+ op.add_option('--glusterd-uuid', metavar='UUID', type=str, default='', help=SUPPRESS_HELP)
op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-')
op.add_option('--mountbroker', metavar='LABEL')
op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs)
op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs)
+ op.add_option('--log-file-mbr', metavar='LOGF', type=str, action='callback', callback=store_abs)
op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
+ op.add_option('--state-detail-file', metavar='STATF', type=str, action='callback', callback=store_abs)
op.add_option('--ignore-deletes', default=False, action='store_true')
+ op.add_option('--isolated-slave', default=False, action='store_true')
+ op.add_option('--use-rsync-xattrs', default=False, action='store_true')
op.add_option('-L', '--log-level', metavar='LVL')
op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0]))
op.add_option('--volume-id', metavar='UUID')
+ op.add_option('--slave-id', metavar='ID')
op.add_option('--session-owner', metavar='ID')
+ op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='')
+ op.add_option('--local-path', metavar='PATH', help=SUPPRESS_HELP, default='')
op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
op.add_option('--rsync-command', metavar='CMD', default='rsync')
+ op.add_option('--rsync-options', metavar='OPTS', default='')
+ op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress')
op.add_option('--timeout', metavar='SEC', type=int, default=120)
op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP)
op.add_option('--sync-jobs', metavar='N', type=int, default=3)
op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
op.add_option('--allow-network', metavar='IPS', default='')
+ op.add_option('--socketdir', metavar='DIR')
+ op.add_option('--state-socket-unencoded', metavar='SOCKF', type=str, action='callback', callback=store_abs)
+ op.add_option('--checkpoint', metavar='LABEL', default='')
+ # tunables for failover/failback mechanism:
+ # None - gsyncd behaves as normal
+ # blind - gsyncd works with xtime pairs to identify
+ # candidates for synchronization
+ # wrapup - same as normal mode but does not assign
+ # xtimes to orphaned files
+ # see crawl() for usage of the above tunables
+ op.add_option('--special-sync-mode', type=str, help=SUPPRESS_HELP)
+
+ # changelog or xtime? (TODO: Change the default)
+ op.add_option('--change-detector', metavar='MODE', type=str, default='xtime')
+ # sleep interval for change detection (xtime crawl uses a hardcoded 1 second sleep time)
+ op.add_option('--change-interval', metavar='SEC', type=int, default=3)
+ # working directory for changelog based mechanism
+ op.add_option('--working-dir', metavar='DIR', type=str, action='callback', callback=store_abs)
op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
# duh. need to specify dest or value will be mapped to None :S
op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True))
+ op.add_option('--resource-local', dest='resource_local', type=str, action='callback', callback=store_local)
+ op.add_option('--resource-remote', dest='resource_remote', type=str, action='callback', callback=store_local)
op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local)
op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True))
op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont'))
+ op.add_option('--verify', type=str, dest="verify", action='callback', callback=store_local)
+ op.add_option('--create', type=str, dest="create", action='callback', callback=store_local)
+ op.add_option('--delete', dest='delete', action='callback', callback=store_local_curry(True))
op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a),
setattr(a[-1].values, 'log_file', '-'),
setattr(a[-1].values, 'log_level', 'DEBUG'))),
+ op.add_option('--path', type=str, action='append')
for a in ('check', 'get'):
op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback',
@@ -197,7 +265,7 @@ def main_i():
op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc'))
tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ]
- remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file' ]
+ remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file', 'use_rsync_xattrs' ]
rq_remote_tunables = { 'listen': True }
# precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults
@@ -206,6 +274,19 @@ def main_i():
# values container.
defaults = op.get_default_values()
opts, args = op.parse_args(values=optparse.Values())
+ args_orig = args[:]
+ r = rconf.get('resource_local')
+ if r:
+ if len(args) == 0:
+ args.append(None)
+ args[0] = r
+ r = rconf.get('resource_remote')
+ if r:
+ if len(args) == 0:
+ raise GsyncdError('local resource unspecfied')
+ elif len(args) == 1:
+ args.append(None)
+ args[1] = r
confdata = rconf.get('config')
if not (len(args) == 2 or \
(len(args) == 1 and rconf.get('listen')) or \
@@ -215,6 +296,12 @@ def main_i():
sys.stderr.write(op.get_usage() + "\n")
sys.exit(1)
+ verify = rconf.get('verify')
+ if verify:
+ logging.info (verify)
+ logging.info ("Able to spawn gsyncd.py")
+ return
+
restricted = os.getenv('_GSYNCD_RESTRICTED_')
if restricted:
@@ -231,6 +318,17 @@ def main_i():
(k, v))
confrx = getattr(confdata, 'rx', None)
+ def makersc(aa, check=True):
+ if not aa:
+ return ([], None, None)
+ ra = [resource.parse_url(u) for u in aa]
+ local = ra[0]
+ remote = None
+ if len(ra) > 1:
+ remote = ra[1]
+ if check and not local.can_connect_to(remote):
+ raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
+ return (ra, local, remote)
if confrx:
# peers are regexen, don't try to parse them
if confrx == 'glob':
@@ -238,27 +336,20 @@ def main_i():
canon_peers = args
namedict = {}
else:
- rscs = [resource.parse_url(u) for u in args]
dc = rconf.get('url_print')
+ rscs, local, remote = makersc(args_orig, not dc)
if dc:
for r in rscs:
print(r.get_url(**{'normal': {},
'canon': {'canonical': True},
'canon_esc': {'canonical': True, 'escaped': True}}[dc]))
return
- local = remote = None
- if rscs:
- local = rscs[0]
- if len(rscs) > 1:
- remote = rscs[1]
- if not local.can_connect_to(remote):
- raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
pa = ([], [], [])
urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True})
for x in rscs:
for i in range(len(pa)):
pa[i].append(x.get_url(**urlprms[i]))
- peers, canon_peers, canon_esc_peers = pa
+ _, canon_peers, canon_esc_peers = pa
# creating the namedict, a dict representing various ways of referring to / repreenting
# peers to be fillable in config templates
mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
@@ -272,12 +363,14 @@ def main_i():
for name in rmap[x]:
for j in range(3):
namedict[mods[j](name)] = pa[j][i]
- if x.scheme == 'gluster':
- namedict[name + 'vol'] = x.volume
+ namedict[name + 'vol'] = x.volume
+ if name == 'remote':
+ namedict['remotehost'] = x.remotehost
if not 'config_file' in rconf:
- rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf")
+ rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd_template.conf")
gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict)
+ checkpoint_change = False
if confdata:
opt_ok = norm(confdata.opt) in tunables + [None]
if confdata.op == 'check':
@@ -293,13 +386,55 @@ def main_i():
gcnf.set(confdata.opt, confdata.val, confdata.rx)
elif confdata.op == 'del':
gcnf.delete(confdata.opt, confdata.rx)
- return
+ # when modifying checkpoint, it's important to make a log
+ # of that, so in that case we go on to set up logging even
+ # if its just config invocation
+ if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
+ not confdata.rx:
+ checkpoint_change = True
+ if not checkpoint_change:
+ return
gconf.__dict__.update(defaults.__dict__)
gcnf.update_to(gconf.__dict__)
gconf.__dict__.update(opts.__dict__)
gconf.configinterface = gcnf
+ delete = rconf.get('delete')
+ if delete:
+ logging.info ('geo-replication delete')
+ # Delete pid file, status file, socket file
+ cleanup_paths = []
+ if getattr(gconf, 'pid_file', None):
+ cleanup_paths.append(gconf.pid_file)
+
+ if getattr(gconf, 'state_file', None):
+ cleanup_paths.append(gconf.state_file)
+
+ if getattr(gconf, 'state_detail_file', None):
+ cleanup_paths.append(gconf.state_detail_file)
+
+ if getattr(gconf, 'state_socket_unencoded', None):
+ cleanup_paths.append(gconf.state_socket_unencoded)
+
+ cleanup_paths.append(rconf['config_file'][:-11] + "*");
+
+ # Cleanup changelog working dirs
+ if getattr(gconf, 'working_dir', None):
+ try:
+ shutil.rmtree(gconf.working_dir)
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Error while removing working dir: %s' % gconf.working_dir)
+
+ for path in cleanup_paths:
+ # To delete temp files
+ for f in glob.glob(path + "*"):
+ _unlink(f)
+ return
+
if restricted and gconf.allow_network:
ssh_conn = os.getenv('SSH_CONNECTION')
if not ssh_conn:
@@ -331,9 +466,38 @@ def main_i():
raise GsyncdError('cannot recognize log level "%s"' % lvl0)
gconf.log_level = lvl2
+ if not privileged() and gconf.log_file_mbr:
+ gconf.log_file = gconf.log_file_mbr
+
+ if checkpoint_change:
+ try:
+ GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
+ if confdata.op == 'set':
+ logging.info('checkpoint %s set' % confdata.val)
+ elif confdata.op == 'del':
+ logging.info('checkpoint info was reset')
+ except IOError:
+ if sys.exc_info()[1].errno == ENOENT:
+ # directory of log path is not present,
+ # which happens if we get here from
+ # a peer-multiplexed "config-set checkpoint"
+ # (as that directory is created only on the
+ # original node)
+ pass
+ else:
+ raise
+ return
+
+ create = rconf.get('create')
+ if create:
+ if getattr(gconf, 'state_file', None):
+ update_file(gconf.state_file, lambda f: f.write(create + '\n'))
+ return
+
go_daemon = rconf['go_daemon']
be_monitor = rconf.get('monitor')
+ rscs, local, remote = makersc(args)
if not be_monitor and isinstance(remote, resource.SSH) and \
go_daemon == 'should':
go_daemon = 'postconn'
@@ -344,16 +508,16 @@ def main_i():
label = 'monitor'
elif remote:
#master
- label = ''
+ label = gconf.local_path
else:
label = 'slave'
startup(go_daemon=go_daemon, log_file=log_file, label=label)
+ resource.Popen.init_errhandler()
if be_monitor:
- return monitor()
+ return monitor(*rscs)
- logging.info("syncing: %s" % " -> ".join(peers))
- resource.Popen.init_errhandler()
+ logging.info("syncing: %s" % " -> ".join(r.url for r in rscs))
if remote:
go_daemon = remote.connect_remote(go_daemon=go_daemon)
if go_daemon:
diff --git a/xlators/features/marker/utils/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
index f0a9d2292..b5b6956ae 100644
--- a/xlators/features/marker/utils/syncdaemon/libcxattr.py
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -43,6 +43,16 @@ class Xattr(object):
return cls._query_xattr( path, siz, 'lgetxattr', attr)
@classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
def llistxattr(cls, path, siz=0):
ret = cls._query_xattr(path, siz, 'llistxattr')
if isinstance(ret, str):
@@ -56,6 +66,11 @@ class Xattr(object):
cls.raise_oserr()
@classmethod
+ def lsetxattr_l(cls, path, attr, val):
+ """ lazy lsetxattr(): caller handles errno """
+ cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ @classmethod
def lremovexattr(cls, path, attr):
ret = cls.libc.lremovexattr(path, attr)
if ret == -1:
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
new file mode 100644
index 000000000..68ec3baf1
--- /dev/null
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -0,0 +1,64 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
new file mode 100644
index 000000000..95810a61e
--- /dev/null
+++ b/geo-replication/syncdaemon/master.py
@@ -0,0 +1,1022 @@
+import os
+import sys
+import time
+import stat
+import random
+import signal
+import json
+import logging
+import socket
+import string
+import errno
+from shutil import copyfileobj
+from errno import ENOENT, ENODATA, EPIPE, EEXIST
+from threading import currentThread, Condition, Lock
+from datetime import datetime
+
+from gconf import gconf
+from tempfile import mkdtemp, NamedTemporaryFile
+from syncdutils import FreeObject, Thread, GsyncdError, boolify, escape, \
+ unescape, select, gauxpfx, md5hex, selfkill, entry2pb, \
+ lstat, errno_wrap
+
+URXTIME = (-1, 0)
+
+# Utility functions to help us to get to closer proximity
+# of the DRY principle (no, don't look for elevated or
+# perspectivistic things here)
+
+def _xtime_now():
+ t = time.time()
+ sec = int(t)
+ nsec = int((t - sec) * 1000000)
+ return (sec, nsec)
+
+def _volinfo_hook_relax_foreign(self):
+ volinfo_sys = self.get_sys_volinfo()
+ fgn_vi = volinfo_sys[self.KFGN]
+ if fgn_vi:
+ expiry = fgn_vi['timeout'] - int(time.time()) + 1
+ logging.info('foreign volume info found, waiting %d sec for expiry' % \
+ expiry)
+ time.sleep(expiry)
+ volinfo_sys = self.get_sys_volinfo()
+ return volinfo_sys
+
+
+# The API!
+
+def gmaster_builder(excrawl=None):
+ """produce the GMaster class variant corresponding
+ to sync mode"""
+ this = sys.modules[__name__]
+ modemixin = gconf.special_sync_mode
+ if not modemixin:
+ modemixin = 'normal'
+ changemixin = isinstance(excrawl, str) and excrawl or gconf.change_detector
+ logging.info('setting up %s change detection mode' % changemixin)
+ modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
+ crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
+ sendmarkmixin = boolify(gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin
+ purgemixin = boolify(gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin
+ class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin):
+ pass
+ return _GMaster
+
+
+# Mixin classes that implement the data format
+# and logic particularities of the certain
+# sync modes
+
+class NormalMixin(object):
+ """normal geo-rep behavior"""
+
+ minus_infinity = URXTIME
+
+ # following staticmethods ideally would be
+ # methods of an xtime object (in particular,
+ # implementing the hooks needed for comparison
+ # operators), but at this point we don't yet
+ # have a dedicated xtime class
+
+ @staticmethod
+ def serialize_xtime(xt):
+ return "%d.%d" % tuple(xt)
+
+ @staticmethod
+ def deserialize_xtime(xt):
+ return tuple(int(x) for x in xt.split("."))
+
+ @staticmethod
+ def native_xtime(xt):
+ return xt
+
+ @staticmethod
+ def xtime_geq(xt0, xt1):
+ return xt0 >= xt1
+
+ def make_xtime_opts(self, is_master, opts):
+ if not 'create' in opts:
+ opts['create'] = is_master
+ if not 'default_xtime' in opts:
+ opts['default_xtime'] = URXTIME
+
+ def xtime_low(self, server, path, **opts):
+ xt = server.xtime(path, self.uuid)
+ if isinstance(xt, int) and xt != ENODATA:
+ return xt
+ if xt == ENODATA or xt < self.volmark:
+ if opts['create']:
+ xt = _xtime_now()
+ server.aggregated.set_xtime(path, self.uuid, xt)
+ else:
+ xt = opts['default_xtime']
+ return xt
+
+ def keepalive_payload_hook(self, timo, gap):
+ # first grab a reference as self.volinfo
+ # can be changed in main thread
+ vi = self.volinfo
+ if vi:
+ # then have a private copy which we can mod
+ vi = vi.copy()
+ vi['timeout'] = int(time.time()) + timo
+ else:
+ # send keep-alives more frequently to
+ # avoid a delay in announcing our volume info
+ # to slave if it becomes established in the
+ # meantime
+ gap = min(10, gap)
+ return (vi, gap)
+
+ def volinfo_hook(self):
+ return self.get_sys_volinfo()
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ if xtr > xtl:
+ raise GsyncdError("timestamp corruption for " + path)
+
+ def need_sync(self, e, xte, xtrd):
+ return xte > xtrd
+
+ def set_slave_xtime(self, path, mark):
+ self.slave.server.set_xtime(path, self.uuid, mark)
+ self.slave.server.set_xtime_remote(path, self.uuid, mark)
+
+class PartialMixin(NormalMixin):
+ """a variant tuned towards operation with a master
+ that has partial info of the slave (brick typically)"""
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ pass
+
+class RecoverMixin(NormalMixin):
+ """a variant that differs from normal in terms
+ of ignoring non-indexed files"""
+
+ @staticmethod
+ def make_xtime_opts(is_master, opts):
+ if not 'create' in opts:
+ opts['create'] = False
+ if not 'default_xtime' in opts:
+ opts['default_xtime'] = URXTIME
+
+ def keepalive_payload_hook(self, timo, gap):
+ return (None, gap)
+
+ def volinfo_hook(self):
+ return _volinfo_hook_relax_foreign(self)
+
+# Further mixins for certain tunable behaviors
+
+class SendmarkNormalMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ return self.sendmark(*a, **kw)
+
+class SendmarkRsyncMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ pass
+
+
+class PurgeNormalMixin(object):
+
+ def purge_missing(self, path, names):
+ self.slave.server.purge(path, names)
+
+class PurgeNoopMixin(object):
+
+ def purge_missing(self, path, names):
+ pass
+
+class GMasterCommon(object):
+ """abstract class impementling master role"""
+
+ KFGN = 0
+ KNAT = 1
+
+ def get_sys_volinfo(self):
+ """query volume marks on fs root
+
+ err out on multiple foreign masters
+ """
+ fgn_vis, nat_vi = self.master.server.aggregated.foreign_volume_infos(), \
+ self.master.server.aggregated.native_volume_info()
+ fgn_vi = None
+ if fgn_vis:
+ if len(fgn_vis) > 1:
+ raise GsyncdError("cannot work with multiple foreign masters")
+ fgn_vi = fgn_vis[0]
+ return fgn_vi, nat_vi
+
+ @property
+ def uuid(self):
+ if self.volinfo:
+ return self.volinfo['uuid']
+
+ @property
+ def volmark(self):
+ if self.volinfo:
+ return self.volinfo['volume_mark']
+
+ def xtime(self, path, *a, **opts):
+ """get amended xtime
+
+ as of amending, we can create missing xtime, or
+ determine a valid value if what we get is expired
+ (as of the volume mark expiry); way of amendig
+ depends on @opts and on subject of query (master
+ or slave).
+ """
+ if a:
+ rsc = a[0]
+ else:
+ rsc = self.master
+ self.make_xtime_opts(rsc == self.master, opts)
+ return self.xtime_low(rsc.server, path, **opts)
+
+ def get_initial_crawl_data(self):
+ # while persisting only 'files_syncd' is non-zero, rest of
+ # the stats are nulls. lets keep it that way in case they
+ # are needed to be used some day...
+ default_data = {'files_syncd': 0,
+ 'files_remaining': 0,
+ 'bytes_remaining': 0,
+ 'purges_remaining': 0}
+ if getattr(gconf, 'state_detail_file', None):
+ try:
+ return json.load(open(gconf.state_detail_file))
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ # Create file with initial data
+ with open(gconf.state_detail_file, 'wb') as f:
+ json.dump(default_data, f)
+ return default_data
+ else:
+ raise
+ return default_data
+
+ def update_crawl_data(self):
+ if getattr(gconf, 'state_detail_file', None):
+ try:
+ same_dir = os.path.dirname(gconf.state_detail_file)
+ with NamedTemporaryFile(dir=same_dir, delete=False) as tmp:
+ json.dump(self.total_crawl_stats, tmp)
+ os.rename(tmp.name, gconf.state_detail_file)
+ except (IOError, OSError):
+ raise
+
+ def __init__(self, master, slave):
+ self.master = master
+ self.slave = slave
+ self.jobtab = {}
+ self.syncer = Syncer(slave)
+ # crawls vs. turns:
+ # - self.crawls is simply the number of crawl() invocations on root
+ # - one turn is a maximal consecutive sequence of crawls so that each
+ # crawl in it detects a change to be synced
+ # - self.turns is the number of turns since start
+ # - self.total_turns is a limit so that if self.turns reaches it, then
+ # we exit (for diagnostic purposes)
+ # so, eg., if the master fs changes unceasingly, self.turns will remain 0.
+ self.crawls = 0
+ self.turns = 0
+ self.total_turns = int(gconf.turns)
+ self.crawl_start = datetime.now()
+ self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0}
+ self.total_crawl_stats = None
+ self.start = None
+ self.change_seen = None
+ # the actual volinfo we make use of
+ self.volinfo = None
+ self.terminate = False
+ self.sleep_interval = 1
+ self.checkpoint_thread = None
+
+ def init_keep_alive(cls):
+ """start the keep-alive thread """
+ timo = int(gconf.timeout or 0)
+ if timo > 0:
+ def keep_alive():
+ while True:
+ vi, gap = cls.keepalive_payload_hook(timo, timo * 0.5)
+ cls.slave.server.keep_alive(vi)
+ time.sleep(gap)
+ t = Thread(target=keep_alive)
+ t.start()
+
+ def should_crawl(cls):
+ return (gconf.glusterd_uuid in cls.master.server.node_uuid())
+
+ def register(self):
+ self.register()
+
+ def crawlwrap(self, oneshot=False):
+ if oneshot:
+ # it's important to do this during the oneshot crawl as
+ # for a passive gsyncd (ie. in a replicate scenario)
+ # the keepalive thread would keep the connection alive.
+ self.init_keep_alive()
+
+ # no need to maintain volinfo state machine.
+ # in a cascading setup, each geo-replication session is
+ # independent (ie. 'volume-mark' and 'xtime' are not
+ # propogated). This is beacuse the slave's xtime is now
+ # stored on the master itself. 'volume-mark' just identifies
+ # that we are in a cascading setup and need to enable
+ # 'geo-replication.ignore-pid-check' option.
+ volinfo_sys = self.volinfo_hook()
+ self.volinfo = volinfo_sys[self.KNAT]
+ inter_master = volinfo_sys[self.KFGN]
+ logging.info("%s master with volume id %s ..." % \
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
+ gconf.configinterface.set('volume_id', self.uuid)
+ if self.volinfo:
+ if self.volinfo['retval']:
+ raise GsyncdError("master is corrupt")
+ self.start_checkpoint_thread()
+ else:
+ raise GsyncdError("master volinfo unavailable")
+ self.total_crawl_stats = self.get_initial_crawl_data()
+ self.lastreport['time'] = time.time()
+ logging.info('crawl interval: %d seconds' % self.sleep_interval)
+
+ t0 = time.time()
+ crawl = self.should_crawl()
+ while not self.terminate:
+ if self.start:
+ logging.debug("... crawl #%d done, took %.6f seconds" % \
+ (self.crawls, time.time() - self.start))
+ self.start = time.time()
+ should_display_info = self.start - self.lastreport['time'] >= 60
+ if should_display_info:
+ logging.info("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
+ self.lastreport.update(crawls = self.crawls,
+ turns = self.turns,
+ time = self.start)
+ t1 = time.time()
+ if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds
+ crawl = self.should_crawl()
+ t0 = t1
+ if not crawl:
+ time.sleep(5)
+ continue
+ self.crawl()
+ if oneshot:
+ return
+ time.sleep(self.sleep_interval)
+
+ @classmethod
+ def _checkpt_param(cls, chkpt, prm, xtimish=True):
+ """use config backend to lookup a parameter belonging to
+ checkpoint @chkpt"""
+ cprm = getattr(gconf, 'checkpoint_' + prm, None)
+ if not cprm:
+ return
+ chkpt_mapped, val = cprm.split(':', 1)
+ if unescape(chkpt_mapped) != chkpt:
+ return
+ if xtimish:
+ val = cls.deserialize_xtime(val)
+ return val
+
+ @classmethod
+ def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
+ """use config backend to store a parameter associated
+ with checkpoint @chkpt"""
+ if xtimish:
+ val = cls.serialize_xtime(val)
+ gconf.configinterface.set('checkpoint_' + prm, "%s:%s" % (escape(chkpt), val))
+
+ @staticmethod
+ def humantime(*tpair):
+ """format xtime-like (sec, nsec) pair to human readable format"""
+ ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\
+ strftime("%Y-%m-%d %H:%M:%S")
+ if len(tpair) > 1:
+ ts += '.' + str(tpair[1])
+ return ts
+
+ def get_extra_info(self):
+ str_info = '\nUptime=%s;FilesSyncd=%d;FilesPending=%d;BytesPending=%d;DeletesPending=%d;' % \
+ (self._crawl_time_format(datetime.now() - self.crawl_start), \
+ self.total_crawl_stats['files_syncd'], \
+ self.total_crawl_stats['files_remaining'], \
+ self.total_crawl_stats['bytes_remaining'], \
+ self.total_crawl_stats['purges_remaining'])
+ str_info += '\0'
+ logging.debug(str_info)
+ return str_info
+
+ def _crawl_time_format(self, crawl_time):
+ # Ex: 5 years, 4 days, 20:23:10
+ years, days = divmod(crawl_time.days, 365.25)
+ years = int(years)
+ days = int(days)
+
+ date=""
+ m, s = divmod(crawl_time.seconds, 60)
+ h, m = divmod(m, 60)
+
+ if years != 0:
+ date += "%s %s " % (years, "year" if years == 1 else "years")
+ if days != 0:
+ date += "%s %s " % (days, "day" if days == 1 else "days")
+
+ date += "%s:%s:%s" % (string.zfill(h, 2), string.zfill(m, 2), string.zfill(s, 2))
+ return date
+
+ def checkpt_service(self, chan, chkpt, tgt):
+ """checkpoint service loop
+
+ monitor and verify checkpoint status for @chkpt, and listen
+ for incoming requests for whom we serve a pretty-formatted
+ status report"""
+ if not chkpt:
+ # dummy loop for the case when there is no checkpt set
+ while True:
+ select([chan], [], [])
+ conn, _ = chan.accept()
+ conn.send(self.get_extra_info())
+ conn.close()
+ completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
+ if completed:
+ completed = tuple(int(x) for x in completed.split('.'))
+ while True:
+ s,_,_ = select([chan], [], [], (not completed) and 5 or None)
+ # either request made and we re-check to not
+ # give back stale data, or we still hunting for completion
+ if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark:
+ # indexing has been reset since setting the checkpoint
+ status = "is invalid"
+ else:
+ xtr = self.xtime('.', self.slave)
+ if isinstance(xtr, int):
+ raise GsyncdError("slave root directory is unaccessible (%s)",
+ os.strerror(xtr))
+ ncompleted = self.xtime_geq(xtr, tgt)
+ if completed and not ncompleted: # stale data
+ logging.warn("completion time %s for checkpoint %s became stale" % \
+ (self.humantime(*completed), chkpt))
+ completed = None
+ gconf.confdata.delete('checkpoint-completed')
+ if ncompleted and not completed: # just reaching completion
+ completed = "%.6f" % time.time()
+ self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False)
+ completed = tuple(int(x) for x in completed.split('.'))
+ logging.info("checkpoint %s completed" % chkpt)
+ status = completed and \
+ "completed at " + self.humantime(completed[0]) or \
+ "not reached yet"
+ if s:
+ conn = None
+ try:
+ conn, _ = chan.accept()
+ try:
+ conn.send(" | checkpoint %s %s %s" % (chkpt, status, self.get_extra_info()))
+ except:
+ exc = sys.exc_info()[1]
+ if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \
+ exc.errno == EPIPE:
+ logging.debug('checkpoint client disconnected')
+ else:
+ raise
+ finally:
+ if conn:
+ conn.close()
+
+ def start_checkpoint_thread(self):
+ """prepare and start checkpoint service"""
+ if self.checkpoint_thread or not (
+ getattr(gconf, 'state_socket_unencoded', None) and getattr(gconf, 'socketdir', None)
+ ):
+ return
+ chan = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ state_socket = os.path.join(gconf.socketdir, md5hex(gconf.state_socket_unencoded) + ".socket")
+ try:
+ os.unlink(state_socket)
+ except:
+ if sys.exc_info()[0] == OSError:
+ pass
+ chan.bind(state_socket)
+ chan.listen(1)
+ checkpt_tgt = None
+ if gconf.checkpoint:
+ checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target')
+ if not checkpt_tgt:
+ checkpt_tgt = self.xtime('.')
+ if isinstance(checkpt_tgt, int):
+ raise GsyncdError("master root directory is unaccessible (%s)",
+ os.strerror(checkpt_tgt))
+ self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt)
+ logging.debug("checkpoint target %s has been determined for checkpoint %s" % \
+ (repr(checkpt_tgt), gconf.checkpoint))
+ t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt))
+ t.start()
+ self.checkpoint_thread = t
+
+ def add_job(self, path, label, job, *a, **kw):
+ """insert @job function to job table at @path with @label"""
+ if self.jobtab.get(path) == None:
+ self.jobtab[path] = []
+ self.jobtab[path].append((label, a, lambda : job(*a, **kw)))
+
+ def add_failjob(self, path, label):
+ """invoke .add_job with a job that does nothing just fails"""
+ logging.debug('salvaged: ' + label)
+ self.add_job(path, label, lambda: False)
+
+ def wait(self, path, *args):
+ """perform jobs registered for @path
+
+ Reset jobtab entry for @path,
+ determine success as the conjuction of
+ success of all the jobs. In case of
+ success, call .sendmark on @path
+ """
+ jobs = self.jobtab.pop(path, [])
+ succeed = True
+ for j in jobs:
+ ret = j[-1]()
+ if not ret:
+ succeed = False
+ if succeed and not args[0] == None:
+ self.sendmark(path, *args)
+ return succeed
+
+ def sendmark(self, path, mark, adct=None):
+ """update slave side xtime for @path to master side xtime
+
+ also can send a setattr payload (see Server.setattr).
+ """
+ if adct:
+ self.slave.server.setattr(path, adct)
+ self.set_slave_xtime(path, mark)
+
+class GMasterChangelogMixin(GMasterCommon):
+ """ changelog based change detection and syncing """
+
+ # index for change type and entry
+ IDX_START = 0
+ IDX_END = 2
+
+ POS_GFID = 0
+ POS_TYPE = 1
+ POS_ENTRY1 = 2
+ POS_ENTRY2 = 3 # renames
+
+ _CL_TYPE_DATA_PFX = "D "
+ _CL_TYPE_METADATA_PFX = "M "
+ _CL_TYPE_ENTRY_PFX = "E "
+
+ TYPE_GFID = [_CL_TYPE_DATA_PFX] # ignoring metadata ops
+ TYPE_ENTRY = [_CL_TYPE_ENTRY_PFX]
+
+ # flat directory heirarchy for gfid based access
+ FLAT_DIR_HIERARCHY = '.'
+
+ # maximum retries per changelog before giving up
+ MAX_RETRIES = 10
+
+ def fallback_xsync(self):
+ logging.info('falling back to xsync mode')
+ gconf.configinterface.set('change-detector', 'xsync')
+ selfkill()
+
+ def setup_working_dir(self):
+ workdir = os.path.join(gconf.working_dir, md5hex(gconf.local_path))
+ logfile = os.path.join(workdir, 'changes.log')
+ logging.debug('changelog working dir %s (log: %s)' % (workdir, logfile))
+ return (workdir, logfile)
+
+ # update stats from *this* crawl
+ def update_cumulative_stats(self, files_pending):
+ self.total_crawl_stats['files_remaining'] = files_pending['count']
+ self.total_crawl_stats['bytes_remaining'] = files_pending['bytes']
+ self.total_crawl_stats['purges_remaining'] = files_pending['purge']
+
+ # sync data
+ def syncdata(self, datas):
+ logging.debug('datas: %s' % (datas))
+ for data in datas:
+ logging.debug('candidate for syncing %s' % data)
+ pb = self.syncer.add(data)
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug('synced ' + se)
+ return True
+ else:
+ if rv[1] in [23, 24]:
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ return True
+ logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1]))
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, data, None, pb)
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def process_change(self, change, done, retry):
+ pfx = gauxpfx()
+ clist = []
+ entries = []
+ datas = set()
+
+ # basic crawl stats: files and bytes
+ files_pending = {'count': 0, 'purge': 0, 'bytes': 0, 'files': []}
+ try:
+ f = open(change, "r")
+ clist = f.readlines()
+ f.close()
+ except IOError:
+ raise
+
+ def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+ dst = dct['stat'] = {}
+ dst['uid'] = st.st_uid
+ dst['gid'] = st.st_gid
+ dst['mode'] = st.st_mode
+ else:
+ dct[k] = ed[k]
+ return dct
+
+ # regular file update: bytes & count
+ def _update_reg(entry, size):
+ if not entry in files_pending['files']:
+ files_pending['count'] += 1
+ files_pending['bytes'] += size
+ files_pending['files'].append(entry)
+ # updates for directories, symlinks etc..
+ def _update_rest():
+ files_pending['count'] += 1
+
+ # entry count
+ def entry_update(entry, size, mode):
+ if stat.S_ISREG(mode):
+ _update_reg(entry, size)
+ else:
+ _update_rest()
+ # purge count
+ def purge_update():
+ files_pending['purge'] += 1
+
+ for e in clist:
+ e = e.strip()
+ et = e[self.IDX_START:self.IDX_END]
+ ec = e[self.IDX_END:].split(' ')
+ if et in self.TYPE_ENTRY:
+ ty = ec[self.POS_TYPE]
+ en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1]))
+ gfid = ec[self.POS_GFID]
+ # definitely need a better way bucketize entry ops
+ if ty in ['UNLINK', 'RMDIR']:
+ purge_update()
+ entries.append(edct(ty, gfid=gfid, entry=en))
+ continue
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ if ty == 'RENAME':
+ entries.append(edct('UNLINK', gfid=gfid, entry=en))
+ else:
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+ entry_update(go, st.st_size, st.st_mode)
+ if ty in ['CREATE', 'MKDIR', 'MKNOD']:
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ elif ty == 'LINK':
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid))
+ elif ty == 'SYMLINK':
+ rl = errno_wrap(os.readlink, [en], [ENOENT])
+ if isinstance(rl, int):
+ continue
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
+ elif ty == 'RENAME':
+ e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2]))
+ entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st))
+ else:
+ logging.warn('ignoring %s [op %s]' % (gfid, ty))
+ elif et in self.TYPE_GFID:
+ go = os.path.join(pfx, ec[0])
+ st = lstat(go)
+ if isinstance(st, int):
+ logging.debug('file %s got purged in the interim' % go)
+ continue
+ entry_update(go, st.st_size, st.st_mode)
+ datas.update([go])
+ logging.debug('entries: %s' % repr(entries))
+ if not retry:
+ self.update_cumulative_stats(files_pending)
+ # sync namespace
+ if (entries):
+ self.slave.server.entry_ops(entries)
+ # sync data
+ if self.syncdata(datas):
+ if done:
+ self.master.server.changelog_done(change)
+ return True
+
+ def sync_done(self):
+ self.total_crawl_stats['files_syncd'] += self.total_crawl_stats['files_remaining']
+ self.total_crawl_stats['files_remaining'] = 0
+ self.total_crawl_stats['bytes_remaining'] = 0
+ self.total_crawl_stats['purges_remaining'] = 0
+ self.update_crawl_data()
+
+ def process(self, changes, done=1):
+ for change in changes:
+ tries = 0
+ retry = False
+ while True:
+ logging.debug('processing change %s' % change)
+ if self.process_change(change, done, retry):
+ self.sync_done()
+ break
+ retry = True
+ tries += 1
+ if tries == self.MAX_RETRIES:
+ logging.warn('changelog %s could not be processed - moving on...' % os.path.basename(change))
+ self.sync_done()
+ if done:
+ self.master.server.changelog_done(change)
+ break
+ # it's either entry_ops() or Rsync that failed to do it's
+ # job. Mostly it's entry_ops() [which currently has a problem
+ # of failing to create an entry but failing to return an errno]
+ # Therefore we do not know if it's either Rsync or the freaking
+ # entry_ops() that failed... so we retry the _whole_ changelog
+ # again.
+ # TODO: remove entry retries when it's gets fixed.
+ logging.warn('incomplete sync, retrying changelog: %s' % change)
+ time.sleep(0.5)
+ self.turns += 1
+
+ def upd_stime(self, stime):
+ if not stime == URXTIME:
+ self.sendmark(self.FLAT_DIR_HIERARCHY, stime)
+
+ def crawl(self):
+ changes = []
+ try:
+ self.master.server.changelog_scan()
+ self.crawls += 1
+ except OSError:
+ self.fallback_xsync()
+ changes = self.master.server.changelog_getchanges()
+ if changes:
+ xtl = self.xtime(self.FLAT_DIR_HIERARCHY)
+ if isinstance(xtl, int):
+ raise GsyncdError('master is corrupt')
+ logging.debug('processing changes %s' % repr(changes))
+ self.process(changes)
+ self.upd_stime(xtl)
+
+ def register(self):
+ (workdir, logfile) = self.setup_working_dir()
+ self.sleep_interval = int(gconf.change_interval)
+ # register with the changelog library
+ try:
+ # 9 == log level (DEBUG)
+ # 5 == connection retries
+ self.master.server.changelog_register(gconf.local_path,
+ workdir, logfile, 9, 5)
+ except OSError:
+ self.fallback_xsync()
+ # control should not reach here
+ raise
+
+class GMasterXsyncMixin(GMasterChangelogMixin):
+ """
+
+ This crawl needs to be xtime based (as of now
+ it's not. this is beacuse we generate CHANGELOG
+ file during each crawl which is then processed
+ by process_change()).
+ For now it's used as a one-shot initial sync
+ mechanism and only syncs directories, regular
+ files and symlinks.
+ """
+
+ def register(self):
+ self.sleep_interval = 60
+ self.tempdir = self.setup_working_dir()[0]
+ self.tempdir = os.path.join(self.tempdir, 'xsync')
+ logging.info('xsync temp directory: %s' % self.tempdir)
+ try:
+ os.makedirs(self.tempdir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST and os.path.isdir(self.tempdir):
+ pass
+ else:
+ raise
+
+ def write_entry_change(self, prefix, data=[]):
+ self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
+
+ def open(self):
+ try:
+ self.xsync_change = os.path.join(self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time())))
+ self.fh = open(self.xsync_change, 'w')
+ except IOError:
+ raise
+
+ def close(self):
+ self.fh.close()
+
+ def fname(self):
+ return self.xsync_change
+
+ def crawl(self, path='.', xtr=None, done=0):
+ """ generate a CHANGELOG file consumable by process_change """
+ if path == '.':
+ self.open()
+ self.crawls += 1
+ if not xtr:
+ # get the root stime and use it for all comparisons
+ xtr = self.xtime('.', self.slave)
+ if isinstance(xtr, int):
+ if xtr != ENOENT:
+ raise GsyncdError('slave is corrupt')
+ xtr = self.minus_infinity
+ xtl = self.xtime(path)
+ if isinstance(xtl, int):
+ raise GsyncdError('master is corrupt')
+ if xtr == xtl:
+ if path == '.':
+ self.close()
+ return
+ self.xtime_reversion_hook(path, xtl, xtr)
+ logging.debug("entering " + path)
+ dem = self.master.server.entries(path)
+ pargfid = self.master.server.gfid(path)
+ if isinstance(pargfid, int):
+ logging.warn('skipping directory %s' % (path))
+ for e in dem:
+ bname = e
+ e = os.path.join(path, e)
+ st = lstat(e)
+ if isinstance(st, int):
+ logging.warn('%s got purged in the interim..' % e)
+ continue
+ gfid = self.master.server.gfid(e)
+ if isinstance(gfid, int):
+ logging.warn('skipping entry %s..' % (e))
+ continue
+ xte = self.xtime(e)
+ if isinstance(xte, int):
+ raise GsyncdError('master is corrupt')
+ if not self.need_sync(e, xte, xtr):
+ continue
+ mo = st.st_mode
+ if stat.S_ISDIR(mo):
+ self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))])
+ self.crawl(e, xtr)
+ elif stat.S_ISLNK(mo):
+ rl = errno_wrap(os.readlink, [en], [ENOENT])
+ if isinstance(rl, int):
+ continue
+ self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl])
+ else:
+ # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave
+ # side will decide if to create the new entry, or to create link.
+ if st.st_nlink == 1:
+ self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))])
+ else:
+ self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))])
+ if stat.S_ISREG(mo):
+ self.write_entry_change("D", [gfid])
+
+ if path == '.':
+ logging.info('processing xsync changelog %s' % self.fname())
+ self.close()
+ self.process([self.fname()], done)
+ self.upd_stime(xtl)
+
+class BoxClosedErr(Exception):
+ pass
+
+class PostBox(list):
+ """synchronized collection for storing things thought of as "requests" """
+
+ def __init__(self, *a):
+ list.__init__(self, *a)
+ # too bad Python stdlib does not have read/write locks...
+ # it would suffivce to grab the lock in .append as reader, in .close as writer
+ self.lever = Condition()
+ self.open = True
+ self.done = False
+
+ def wait(self):
+ """wait on requests to be processed"""
+ self.lever.acquire()
+ if not self.done:
+ self.lever.wait()
+ self.lever.release()
+ return self.result
+
+ def wakeup(self, data):
+ """wake up requestors with the result"""
+ self.result = data
+ self.lever.acquire()
+ self.done = True
+ self.lever.notifyAll()
+ self.lever.release()
+
+ def append(self, e):
+ """post a request"""
+ self.lever.acquire()
+ if not self.open:
+ raise BoxClosedErr
+ list.append(self, e)
+ self.lever.release()
+
+ def close(self):
+ """prohibit the posting of further requests"""
+ self.lever.acquire()
+ self.open = False
+ self.lever.release()
+
+class Syncer(object):
+ """a staged queue to relay rsync requests to rsync workers
+
+ By "staged queue" its meant that when a consumer comes to the
+ queue, it takes _all_ entries, leaving the queue empty.
+ (I don't know if there is an official term for this pattern.)
+
+ The queue uses a PostBox to accumulate incoming items.
+ When a consumer (rsync worker) comes, a new PostBox is
+ set up and the old one is passed on to the consumer.
+
+ Instead of the simplistic scheme of having one big lock
+ which synchronizes both the addition of new items and
+ PostBox exchanges, use a separate lock to arbitrate consumers,
+ and rely on PostBox's synchronization mechanisms take
+ care about additions.
+
+ There is a corner case racy situation, producers vs. consumers,
+ which is not handled by this scheme: namely, when the PostBox
+ exchange occurs in between being passed to the producer for posting
+ and the post placement. But that's what Postbox.close is for:
+ such a posting will find the PostBox closed, in which case
+ the producer can re-try posting against the actual PostBox of
+ the queue.
+
+ To aid accumlation of items in the PostBoxen before grabbed
+ by an rsync worker, the worker goes to sleep a bit after
+ each completed syncjob.
+ """
+
+ def __init__(self, slave):
+ """spawn worker threads"""
+ self.slave = slave
+ self.lock = Lock()
+ self.pb = PostBox()
+ self.bytes_synced = 0
+ for i in range(int(gconf.sync_jobs)):
+ t = Thread(target=self.syncjob)
+ t.start()
+
+ def syncjob(self):
+ """the life of a worker"""
+ while True:
+ pb = None
+ while True:
+ self.lock.acquire()
+ if self.pb:
+ pb, self.pb = self.pb, PostBox()
+ self.lock.release()
+ if pb:
+ break
+ time.sleep(0.5)
+ pb.close()
+ po = self.slave.rsync(pb)
+ if po.returncode == 0:
+ ret = (True, 0)
+ elif po.returncode in (23, 24):
+ # partial transfer (cf. rsync(1)), that's normal
+ ret = (False, po.returncode)
+ else:
+ po.errfail()
+ pb.wakeup(ret)
+
+ def add(self, e):
+ while True:
+ pb = self.pb
+ try:
+ pb.append(e)
+ return pb
+ except BoxClosedErr:
+ pass
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
new file mode 100644
index 000000000..0c3a42fa6
--- /dev/null
+++ b/geo-replication/syncdaemon/monitor.py
@@ -0,0 +1,244 @@
+import os
+import sys
+import time
+import signal
+import logging
+import uuid
+import xml.etree.ElementTree as XET
+from subprocess import PIPE
+from resource import Popen, FILE, GLUSTER, SSH
+from threading import Lock
+from gconf import gconf
+from syncdutils import update_file, select, waitpid, set_term_handler, is_host_local, GsyncdError
+from syncdutils import escape, Thread, finalize, memoize
+
+class Volinfo(object):
+ def __init__(self, vol, host='localhost', prelude=[]):
+ po = Popen(prelude + ['gluster', '--xml', '--remote-host=' + host, 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE)
+ vix = po.stdout.read()
+ po.wait()
+ po.terminate_geterr()
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ if prelude:
+ via = '(via %s) ' % prelude.join(' ')
+ else:
+ via = ' '
+ raise GsyncdError('getting volume info of %s%s failed with errorcode %s',
+ (vol, via, vi.find('opErrno').text))
+ self.tree = vi
+ self.volume = vol
+ self.host = host
+
+ def get(self, elem):
+ return self.tree.findall('.//' + elem)
+
+ @property
+ @memoize
+ def bricks(self):
+ def bparse(b):
+ host, dirp = b.text.split(':', 2)
+ return {'host': host, 'dir': dirp}
+ return [ bparse(b) for b in self.get('brick') ]
+
+ @property
+ @memoize
+ def uuid(self):
+ ids = self.get('id')
+ if len(ids) != 1:
+ raise GsyncdError("volume info of %s obtained from %s: ambiguous uuid",
+ self.volume, self.host)
+ return ids[0].text
+
+
+class Monitor(object):
+ """class which spawns and manages gsyncd workers"""
+
+ ST_INIT = 'Initializing...'
+ ST_STABLE = 'Stable'
+ ST_FAULTY = 'faulty'
+ ST_INCON = 'inconsistent'
+ _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
+
+ def __init__(self):
+ self.lock = Lock()
+ self.state = {}
+
+ def set_state(self, state, w=None):
+ """set the state that can be used by external agents
+ like glusterd for status reporting"""
+ computestate = lambda: self.state and self._ST_ORD[max(self._ST_ORD.index(s) for s in self.state.values())]
+ if w:
+ self.lock.acquire()
+ old_state = computestate()
+ self.state[w] = state
+ state = computestate()
+ self.lock.release()
+ if state != old_state:
+ self.set_state(state)
+ else:
+ logging.info('new state: %s' % state)
+ if getattr(gconf, 'state_file', None):
+ update_file(gconf.state_file, lambda f: f.write(state + '\n'))
+
+ @staticmethod
+ def terminate():
+ # relax one SIGTERM by setting a handler that sets back
+ # standard handler
+ set_term_handler(lambda *a: set_term_handler())
+ # give a chance to graceful exit
+ os.kill(-os.getpid(), signal.SIGTERM)
+
+ def monitor(self, w, argv, cpids):
+ """the monitor loop
+
+ Basic logic is a blantantly simple blunt heuristics:
+ if spawned client survives 60 secs, it's considered OK.
+ This servers us pretty well as it's not vulneralbe to
+ any kind of irregular behavior of the child...
+
+ ... well, except for one: if children is hung up on
+ waiting for some event, it can survive aeons, still
+ will be defunct. So we tweak the above logic to
+ expect the worker to send us a signal within 60 secs
+ (in the form of closing its end of a pipe). The worker
+ does this when it's done with the setup stage
+ ready to enter the service loop (note it's the setup
+ stage which is vulnerable to hangs -- the full
+ blown worker blows up on EPIPE if the net goes down,
+ due to the keep-alive thread)
+ """
+
+ self.set_state(self.ST_INIT, w)
+ ret = 0
+ def nwait(p, o=0):
+ p2, r = waitpid(p, o)
+ if not p2:
+ return
+ return r
+ def exit_signalled(s):
+ """ child teminated due to receipt of SIGUSR1 """
+ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
+ def exit_status(s):
+ if os.WIFEXITED(s):
+ return os.WEXITSTATUS(s)
+ return 1
+ conn_timeout = int(gconf.connection_timeout)
+ while ret in (0, 1):
+ logging.info('-' * conn_timeout)
+ logging.info('starting gsyncd worker')
+ pr, pw = os.pipe()
+ cpid = os.fork()
+ if cpid == 0:
+ os.close(pr)
+ os.execv(sys.executable, argv + ['--feedback-fd', str(pw),
+ '--local-path', w[0],
+ '--local-id', '.' + escape(w[0]),
+ '--resource-remote', w[1]])
+ self.lock.acquire()
+ cpids.add(cpid)
+ self.lock.release()
+ os.close(pw)
+ t0 = time.time()
+ so = select((pr,), (), (), conn_timeout)[0]
+ os.close(pr)
+ if so:
+ ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died before establishing connection")
+ else:
+ logging.debug("worker seems to be connected (?? racy check)")
+ while time.time() < t0 + conn_timeout:
+ ret = nwait(cpid, os.WNOHANG)
+ if ret != None:
+ logging.debug("worker died in startup phase")
+ break
+ time.sleep(1)
+ else:
+ logging.debug("worker not confirmed in %d sec, aborting it" % \
+ conn_timeout)
+ self.terminate()
+ time.sleep(1)
+ os.kill(cpid, signal.SIGKILL)
+ ret = nwait(cpid)
+ if ret == None:
+ self.set_state(self.ST_STABLE, w)
+ ret = nwait(cpid)
+ if exit_signalled(ret):
+ ret = 0
+ else:
+ ret = exit_status(ret)
+ if ret in (0,1):
+ self.set_state(self.ST_FAULTY, w)
+ time.sleep(10)
+ self.set_state(self.ST_INCON, w)
+ return ret
+
+ def multiplex(self, wspx, suuid):
+ argv = sys.argv[:]
+ for o in ('-N', '--no-daemon', '--monitor'):
+ while o in argv:
+ argv.remove(o)
+ argv.extend(('-N', '-p', '', '--slave-id', suuid))
+ argv.insert(0, os.path.basename(sys.executable))
+
+ cpids = set()
+ ta = []
+ for wx in wspx:
+ def wmon(w):
+ cpid, _ = self.monitor(w, argv, cpids)
+ terminate()
+ time.sleep(1)
+ self.lock.acquire()
+ for cpid in cpids:
+ os.kill(cpid, signal.SIGKILL)
+ self.lock.release()
+ finalize(exval=1)
+ t = Thread(target = wmon, args=[wx])
+ t.start()
+ ta.append(t)
+ for t in ta:
+ t.join()
+
+def distribute(*resources):
+ master, slave = resources
+ mvol = Volinfo(master.volume, master.host)
+ logging.debug('master bricks: ' + repr(mvol.bricks))
+ prelude = []
+ si = slave
+ if isinstance(slave, SSH):
+ prelude = gconf.ssh_command.split() + [slave.remote_addr]
+ si = slave.inner_rsc
+ logging.debug('slave SSH gateway: ' + slave.remote_addr)
+ if isinstance(si, FILE):
+ sbricks = {'host': 'localhost', 'dir': si.path}
+ suuid = uuid.uuid5(uuid.NAMESPACE_URL, slave.get_url(canonical=True))
+ elif isinstance(si, GLUSTER):
+ svol = Volinfo(si.volume, si.host, prelude)
+ sbricks = svol.bricks
+ suuid = svol.uuid
+ else:
+ raise GsyncdError("unkown slave type " + slave.url)
+ logging.info('slave bricks: ' + repr(sbricks))
+ if isinstance(si, FILE):
+ slaves = [ slave.url ]
+ else:
+ slavenodes = set(b['host'] for b in sbricks)
+ if isinstance(slave, SSH) and not gconf.isolated_slave:
+ rap = SSH.parse_ssh_address(slave)
+ slaves = [ 'ssh://' + rap['user'] + '@' + h + ':' + si.url for h in slavenodes ]
+ else:
+ slavevols = [ h + ':' + si.volume for h in slavenodes ]
+ if isinstance(slave, SSH):
+ slaves = [ 'ssh://' + rap.remote_addr + ':' + v for v in slavevols ]
+ else:
+ slaves = slavevols
+
+ workerspex = [ (brick['dir'], slaves[idx % len(slaves)]) for idx, brick in enumerate(mvol.bricks) if is_host_local(brick['host']) ]
+ logging.info('worker specs: ' + repr(workerspex))
+ return workerspex, suuid
+
+def monitor(*resources):
+ """oh yeah, actually Monitor is used as singleton, too"""
+ return Monitor().multiplex(*distribute(*resources))
diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index 755fb61df..755fb61df 100644
--- a/xlators/features/marker/utils/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
index c4cd19c9f..faf62f868 100644
--- a/xlators/features/marker/utils/syncdaemon/resource.py
+++ b/geo-replication/syncdaemon/resource.py
@@ -5,21 +5,23 @@ import stat
import time
import fcntl
import errno
+import types
import struct
import socket
import logging
import tempfile
import threading
import subprocess
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR
-from select import error as selecterror
+from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR, ENOTEMPTY, ESTALE, EINVAL
+from select import error as SelectError
from gconf import gconf
import repce
from repce import RepceServer, RepceClient
-from master import GMaster
+from master import gmaster_builder
import syncdutils
-from syncdutils import GsyncdError, select, privileged
+from syncdutils import GsyncdError, select, privileged, boolify, funcode
+from syncdutils import umask, entry2pb, gauxpfx, errno_wrap, lstat
UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
@@ -105,7 +107,18 @@ class _MetaXattr(object):
setattr(self, m, getattr(LXattr, m))
return getattr(self, meth)
+class _MetaChangelog(object):
+ def __getattr__(self, meth):
+ from libgfchangelog import Changes as LChanges
+ xmeth = [ m for m in dir(LChanges) if m[0] != '_' ]
+ if not meth in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LChanges, m))
+ return getattr(self, meth)
+
Xattr = _MetaXattr()
+Changes = _MetaChangelog()
class Popen(subprocess.Popen):
@@ -121,16 +134,16 @@ class Popen(subprocess.Popen):
errstore = cls.errstore.copy()
try:
poe, _ ,_ = select([po.stderr for po in errstore], [], [], 1)
- except ValueError, selecterror:
+ except (ValueError, SelectError):
continue
for po in errstore:
if po.stderr not in poe:
- next
+ continue
po.lock.acquire()
try:
- la = errstore.get(po)
- if la == None:
+ if po.on_death_row:
continue
+ la = errstore[po]
try:
fd = po.stderr.fileno()
except ValueError: # file is already closed
@@ -169,6 +182,7 @@ class Popen(subprocess.Popen):
if 'close_fds' not in kw:
kw['close_fds'] = True
self.lock = threading.Lock()
+ self.on_death_row = False
try:
sup(self, args, *a, **kw)
except:
@@ -177,7 +191,7 @@ class Popen(subprocess.Popen):
raise
raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \
(args[0], errno.errorcode[ex.errno], os.strerror(ex.errno)))
- if kw['stderr'] == subprocess.PIPE:
+ if kw.get('stderr') == subprocess.PIPE:
assert(getattr(self, 'errhandler', None))
self.errstore[self] = []
@@ -188,9 +202,17 @@ class Popen(subprocess.Popen):
filling = ", saying:"
logging.error("""command "%s" returned with %s%s""" % \
(" ".join(self.args), repr(self.returncode), filling))
+ lp = ''
+ def logerr(l):
+ logging.error(self.args[0] + "> " + l)
for l in self.elines:
- for ll in l.rstrip().split("\n"):
- logging.error(self.args[0] + "> " + ll.rstrip())
+ ls = l.split('\n')
+ ls[0] = lp + ls[0]
+ lp = ls.pop()
+ for ll in ls:
+ logerr(ll)
+ if lp:
+ logerr(lp)
def errfail(self):
"""fail nicely if child did not terminate with success"""
@@ -204,16 +226,19 @@ class Popen(subprocess.Popen):
"""
self.lock.acquire()
try:
- elines = self.errstore.pop(self)
+ self.on_death_row = True
finally:
self.lock.release()
+ elines = self.errstore.pop(self)
if self.poll() == None:
self.terminate()
if self.poll() == None:
time.sleep(0.1)
- self.kill()
- self.wait()
+ self.kill()
+ self.wait()
while True:
+ if not select([self.stderr],[],[],0.1)[0]:
+ break
b = os.read(self.stderr.fileno(), 1024)
if b:
elines.append(b)
@@ -233,10 +258,24 @@ class Server(object):
and classmethods and is used directly, without instantiation.)
"""
- GX_NSPACE = (privileged() and "trusted" or "system") + ".glusterfs"
+ GX_NSPACE_PFX = (privileged() and "trusted" or "system")
+ GX_NSPACE = GX_NSPACE_PFX + ".glusterfs"
NTV_FMTSTR = "!" + "B"*19 + "II"
FRGN_XTRA_FMT = "I"
FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
+ GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
+ local_path = ''
+
+ @classmethod
+ def _fmt_mknod(cls, l):
+ return "!II%dsI%dsIII" % (cls.GX_GFID_CANONICAL_LEN, l+1)
+ @classmethod
+ def _fmt_mkdir(cls, l):
+ return "!II%dsI%dsII" % (cls.GX_GFID_CANONICAL_LEN, l+1)
+ @classmethod
+ def _fmt_symlink(cls, l1, l2):
+ return "!II%dsI%ds%ds" % (cls.GX_GFID_CANONICAL_LEN, l1+1, l2+1)
def _pathguard(f):
"""decorator method that checks
@@ -245,22 +284,21 @@ class Server(object):
point out of the managed tree
"""
- fc = getattr(f, 'func_code', None)
- if not fc:
- # python 3
- fc = f.__code__
+ fc = funcode(f)
pi = list(fc.co_varnames).index('path')
def ff(*a):
path = a[pi]
ps = path.split('/')
if path[0] == '/' or '..' in ps:
raise ValueError('unsafe path')
+ a = list(a)
+ a[pi] = os.path.join(a[0].local_path, path)
return f(*a)
return ff
- @staticmethod
+ @classmethod
@_pathguard
- def entries(path):
+ def entries(cls, path):
"""directory entries in an array"""
# prevent symlinks being followed
if not stat.S_ISDIR(os.lstat(path).st_mode):
@@ -359,14 +397,162 @@ class Server(object):
raise
@classmethod
+ def gfid(cls, gfidpath):
+ return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT])
+
+ @classmethod
+ def node_uuid(cls, path='.'):
+ try:
+ uuid_l = Xattr.lgetxattr_buf(path, '.'.join([cls.GX_NSPACE, 'node-uuid']))
+ return uuid_l[:-1].split(' ')
+ except OSError:
+ raise
+
+ @classmethod
+ def xtime_vec(cls, path, *uuids):
+ """vectored version of @xtime
+
+ accepts a list of uuids and returns a dictionary
+ with uuid as key(s) and xtime as value(s)
+ """
+ xt = {}
+ for uuid in uuids:
+ xtu = cls.xtime(path, uuid)
+ if xtu == ENODATA:
+ xtu = None
+ if isinstance(xtu, int):
+ return xtu
+ xt[uuid] = xtu
+ return xt
+
+ @classmethod
@_pathguard
def set_xtime(cls, path, uuid, mark):
"""set @mark as xtime for @uuid on @path"""
Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
- @staticmethod
+ @classmethod
+ @_pathguard
+ def set_xtime_remote(cls, path, uuid, mark):
+ """
+ set @mark as xtime for @uuid on @path
+ the difference b/w this and set_xtime() being
+ set_xtime() being overloaded to set the xtime
+ on the brick (this method sets xtime on the
+ remote slave)
+ """
+ Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
+
+ @classmethod
+ def set_xtime_vec(cls, path, mark_dct):
+ """vectored (or dictered) version of set_xtime
+
+ ignore values that match @ignore
+ """
+ for u,t in mark_dct.items():
+ cls.set_xtime(path, u, t)
+
+ @classmethod
+ def entry_ops(cls, entries):
+ pfx = gauxpfx()
+ logging.debug('entries: %s' % repr(entries))
+ # regular file
+ def entry_pack_reg(gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+ def entry_pack_mkdir(gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mkdir(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ #symlink
+ def entry_pack_symlink(gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
+ def entry_purge(entry, gfid):
+ # This is an extremely racy code and needs to be fixed ASAP.
+ # The GFID check here is to be sure that the pargfid/bname
+ # to be purged is the GFID gotten from the changelog.
+ # (a stat(changelog_gfid) would also be valid here)
+ # The race here is between the GFID check and the purge.
+ disk_gfid = cls.gfid(entry)
+ if isinstance(disk_gfid, int):
+ return
+ if not gfid == disk_gfid:
+ return
+ er = errno_wrap(os.unlink, [entry], [ENOENT, EISDIR])
+ if isinstance(er, int):
+ if er == EISDIR:
+ er = errno_wrap(os.rmdir, [entry], [ENOENT, ENOTEMPTY])
+ if er == ENOTEMPTY:
+ return er
+ for e in entries:
+ blob = None
+ op = e['op']
+ gfid = e['gfid']
+ entry = e['entry']
+ (pg, bname) = entry2pb(entry)
+ if op in ['RMDIR', 'UNLINK']:
+ while True:
+ er = entry_purge(entry, gfid)
+ if isinstance(er, int):
+ time.sleep(1)
+ else:
+ break
+ elif op in ['CREATE', 'MKNOD']:
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ elif op == 'MKDIR':
+ blob = entry_pack_mkdir(gfid, bname, e['stat'])
+ elif op == 'LINK':
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(entry)
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ else:
+ errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST])
+ elif op == 'SYMLINK':
+ blob = entry_pack_symlink(gfid, bname, e['link'], e['stat'])
+ elif op == 'RENAME':
+ en = e['entry1']
+ st = lstat(entry)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg(gfid, bname, e['stat'])
+ else:
+ errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST])
+ if blob:
+ errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL])
+
+ @classmethod
+ def changelog_register(cls, cl_brick, cl_dir, cl_log, cl_level, retries = 0):
+ Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries)
+
+ @classmethod
+ def changelog_scan(cls):
+ Changes.cl_scan()
+
+ @classmethod
+ def changelog_getchanges(cls):
+ return Changes.cl_getchanges()
+
+ @classmethod
+ def changelog_done(cls, clfile):
+ Changes.cl_done(clfile)
+
+ @classmethod
@_pathguard
- def setattr(path, adct):
+ def setattr(cls, path, adct):
"""set file attributes
@adct is a dict, where 'own', 'mode' and 'times'
@@ -434,6 +620,10 @@ class SlaveLocal(object):
stop servicing if a timeout is configured and got no
keep-alime in that inteval
"""
+
+ if boolify(gconf.use_rsync_xattrs) and not privileged():
+ raise GsyncdError("using rsync for extended attributes is not supported")
+
repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
t = syncdutils.Thread(target=lambda: (repce.service_loop(),
syncdutils.finalize()))
@@ -460,12 +650,13 @@ class SlaveRemote(object):
communicate throuh its stdio.
"""
slave = opts.get('slave', self.url)
+ extra_opts = []
so = getattr(gconf, 'session_owner', None)
if so:
- so_args = ['--session-owner', so]
- else:
- so_args = []
- po = Popen(rargs + gconf.remote_gsyncd.split() + so_args + \
+ extra_opts += ['--session-owner', so]
+ if boolify(gconf.use_rsync_xattrs):
+ extra_opts.append('--use-rsync-xattrs')
+ po = Popen(rargs + gconf.remote_gsyncd.split() + extra_opts + \
['-N', '--listen', '--timeout', str(gconf.timeout), slave],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
gconf.transport = po
@@ -493,10 +684,19 @@ class SlaveRemote(object):
if not files:
raise GsyncdError("no files to sync")
logging.debug("files: " + ", ".join(files))
- argv = gconf.rsync_command.split() + ['-aRS', '--super', '--numeric-ids'] + files + list(args)
- po = Popen(argv, stderr=subprocess.PIPE)
+ argv = gconf.rsync_command.split() + \
+ ['-avR0', '--inplace', '--files-from=-', '--super','--stats', '--numeric-ids', '--no-implied-dirs'] + \
+ gconf.rsync_options.split() + (boolify(gconf.use_rsync_xattrs) and ['--xattrs'] or []) + \
+ ['.'] + list(args)
+ po = Popen(argv, stdin=subprocess.PIPE,stderr=subprocess.PIPE)
+ for f in files:
+ po.stdin.write(f)
+ po.stdin.write('\0')
+
+ po.stdin.close()
po.wait()
po.terminate_geterr(fail_on_err = False)
+
return po
@@ -604,6 +804,7 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
if x[0] > now:
logging.debug("volinfo[%s] expires: %d (%d sec later)" % \
(d['uuid'], x[0], x[0] - now))
+ d['timeout'] = x[0]
dict_list.append(d)
else:
try:
@@ -632,7 +833,8 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
def can_connect_to(self, remote):
"""determine our position in the connectibility matrix"""
- return True
+ return not remote or \
+ (isinstance(remote, SSH) and isinstance(remote.inner_rsc, GLUSTER))
class Mounter(object):
"""Abstract base class for mounter backends"""
@@ -811,6 +1013,10 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
sup(self, *a, **kw)
self.slavedir = "/proc/%d/cwd" % self.server.pid()
+ def gmaster_instantiate_tuple(self, slave):
+ """return a tuple of the 'one shot' and the 'main crawl' class instance"""
+ return (gmaster_builder('xsync')(self, slave), gmaster_builder()(self, slave))
+
def service_loop(self, *args):
"""enter service loop
@@ -820,7 +1026,41 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
- else do that's what's inherited
"""
if args:
- GMaster(self, args[0]).crawl_loop()
+ slave = args[0]
+ if gconf.local_path:
+ class brickserver(FILE.FILEServer):
+ local_path = gconf.local_path
+ aggregated = self.server
+ @classmethod
+ def entries(cls, path):
+ e = super(brickserver, cls).entries(path)
+ # on the brick don't mess with /.glusterfs
+ if path == '.':
+ try:
+ e.remove('.glusterfs')
+ except ValueError:
+ pass
+ return e
+ if gconf.slave_id:
+ # define {,set_}xtime in slave, thus preempting
+ # the call to remote, so that it takes data from
+ # the local brick
+ slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server)
+ slave.server.set_xtime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_xtime(path, uuid + '.' + gconf.slave_id, mark), slave.server)
+ (g1, g2) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server = brickserver
+ g2.master.server = brickserver
+ else:
+ (g1, g2) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server.aggregated = gmaster.master.server
+ g2.master.server.aggregated = gmaster.master.server
+ # bad bad bad: bad way to do things like this
+ # need to make this elegant
+ # register the crawlers and start crawling
+ g1.register()
+ g2.register()
+ g1.crawlwrap(oneshot=True)
+ g2.crawlwrap()
else:
sup(self, *args)
@@ -839,14 +1079,21 @@ class SSH(AbstractUrl, SlaveRemote):
self.remote_addr, inner_url = sup(self, path,
'^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ]))
self.inner_rsc = parse_url(inner_url)
+ self.volume = inner_url[1:]
- def canonical_path(self):
+ @staticmethod
+ def parse_ssh_address(self):
m = re.match('([^@]+)@(.+)', self.remote_addr)
if m:
u, h = m.groups()
else:
u, h = syncdutils.getusername(), self.remote_addr
- remote_addr = '@'.join([u, gethostbyname(h)])
+ self.remotehost = h
+ return {'user': u, 'host': h}
+
+ def canonical_path(self):
+ rap = self.parse_ssh_address(self)
+ remote_addr = '@'.join([rap['user'], gethostbyname(rap['host'])])
return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
def can_connect_to(self, remote):
@@ -892,9 +1139,15 @@ class SSH(AbstractUrl, SlaveRemote):
"""
if go_daemon == 'done':
return self.start_fd_client(*self.fd_pair)
- gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'))
+
+ syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'),
+ self.remote_addr,
+ self.inner_rsc.url)
+
deferred = go_daemon == 'postconn'
- ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred)
+ ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr],
+ slave=self.inner_rsc.url, deferred=deferred)
+
if deferred:
# send a message to peer so that we can wait for
# the answer from which we know connection is
@@ -915,4 +1168,5 @@ class SSH(AbstractUrl, SlaveRemote):
return 'should'
def rsync(self, files):
- return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl)
+ return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args),
+ *(gconf.rsync_ssh_options.split() + [self.slaveurl]))
diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
index f786bc343..348eb38c1 100644
--- a/xlators/features/marker/utils/syncdaemon/syncdutils.py
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -5,12 +5,14 @@ import time
import fcntl
import shutil
import logging
+import socket
from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED, EINTR, errorcode
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED, EINTR, ENOENT, EPERM, ESTALE, errorcode
from signal import signal, SIGTERM, SIGKILL
from time import sleep
import select as oselect
from os import waitpid as owaitpid
+
try:
from cPickle import PickleError
except ImportError:
@@ -25,6 +27,16 @@ try:
except ImportError:
import urllib
+try:
+ from hashlib import md5 as md5
+except ImportError:
+ # py 2.4
+ from md5 import new as md5
+
+# auxillary gfid based access prefix
+_CL_AUX_GFID_PFX = ".gfid/"
+GF_OP_RETRIES = 20
+
def escape(s):
"""the chosen flavor of string escaping, used all over
to turn whatever data to creatable representation"""
@@ -68,6 +80,38 @@ def update_file(path, updater, merger = lambda f: True):
if fx:
fx.close()
+def create_manifest(fname, content):
+ """
+ Create manifest file for SSH Control Path
+ """
+ fd = None
+ try:
+ fd = os.open(fname, os.O_CREAT|os.O_RDWR)
+ try:
+ os.write(fd, content)
+ except:
+ os.close(fd)
+ raise
+ finally:
+ if fd != None:
+ os.close(fd)
+
+def setup_ssh_ctl(ctld, remote_addr, resource_url):
+ """
+ Setup GConf ssh control path parameters
+ """
+ gconf.ssh_ctl_dir = ctld
+ content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr,
+ resource_url)
+ content_md5 = md5hex(content)
+ fname = os.path.join(gconf.ssh_ctl_dir,
+ "%s.mft" % content_md5)
+
+ create_manifest(fname, content)
+ ssh_ctl_path = os.path.join(gconf.ssh_ctl_dir,
+ "%s.sock" % content_md5)
+ gconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
+
def grabfile(fname, content=None):
"""open @fname + contest for its fcntl lock
@@ -138,6 +182,12 @@ def finalize(*a, **kw):
raise
if gconf.ssh_ctl_dir and not gconf.cpid:
shutil.rmtree(gconf.ssh_ctl_dir)
+ if getattr(gconf, 'state_socket', None):
+ try:
+ os.unlink(gconf.state_socket)
+ except:
+ if sys.exc_info()[0] == OSError:
+ pass
if gconf.log_exit:
logging.info("exiting.")
sys.stdout.flush()
@@ -165,14 +215,20 @@ def log_raise_exception(excont):
logtag = None
if isinstance(exc, GsyncdError):
if is_filelog:
- logging.error(exc.message)
- sys.stderr.write('failure: ' + exc.message + "\n")
+ logging.error(exc.args[0])
+ sys.stderr.write('failure: ' + exc.args[0] + '\n')
elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
((isinstance(exc, OSError) or isinstance(exc, IOError)) and \
exc.errno == EPIPE):
logging.error('connection to peer is broken')
if hasattr(gconf, 'transport'):
gconf.transport.wait()
+ if gconf.transport.returncode == 127:
+ logging.warn("!!!!!!!!!!!!!")
+ logging.warn('!!! getting "No such file or directory" errors '
+ "is most likely due to MISCONFIGURATION, please consult "
+ "http://access.redhat.com/knowledge/docs/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html")
+ logging.warn("!!!!!!!!!!!!!")
gconf.transport.terminate_geterr()
elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED):
logging.error('glusterfs session went down [%s]', errorcode[exc.errno])
@@ -274,3 +330,109 @@ def waitpid (*a):
def set_term_handler(hook=lambda *a: finalize(*a, **{'exval': 1})):
signal(SIGTERM, hook)
+
+def is_host_local(host):
+ locaddr = False
+ for ai in socket.getaddrinfo(host, None):
+ # cf. http://github.com/gluster/glusterfs/blob/ce111f47/xlators/mgmt/glusterd/src/glusterd-utils.c#L125
+ if ai[0] == socket.AF_INET:
+ if ai[-1][0].split(".")[0] == "127":
+ locaddr = True
+ break
+ elif ai[0] == socket.AF_INET6:
+ if ai[-1][0] == "::1":
+ locaddr = True
+ break
+ else:
+ continue
+ try:
+ # use ICMP socket to avoid net.ipv4.ip_nonlocal_bind issue,
+ # cf. https://bugzilla.redhat.com/show_bug.cgi?id=890587
+ s = socket.socket(ai[0], socket.SOCK_RAW, socket.IPPROTO_ICMP)
+ except socket.error:
+ ex = sys.exc_info()[1]
+ if ex.errno != EPERM:
+ raise
+ f = None
+ try:
+ f = open("/proc/sys/net/ipv4/ip_nonlocal_bind")
+ if int(f.read()) != 0:
+ raise GsyncdError(
+ "non-local bind is set and not allowed to create raw sockets, "
+ "cannot determine if %s is local" % host)
+ s = socket.socket(ai[0], socket.SOCK_DGRAM)
+ finally:
+ if f:
+ f.close()
+ try:
+ s.bind(ai[-1])
+ locaddr = True
+ break
+ except:
+ pass
+ s.close()
+ return locaddr
+
+def funcode(f):
+ fc = getattr(f, 'func_code', None)
+ if not fc:
+ # python 3
+ fc = f.__code__
+ return fc
+
+def memoize(f):
+ fc = funcode(f)
+ fn = fc.co_name
+ def ff(self, *a, **kw):
+ rv = getattr(self, '_' + fn, None)
+ if rv == None:
+ rv = f(self, *a, **kw)
+ setattr(self, '_' + fn, rv)
+ return rv
+ return ff
+
+def umask():
+ return os.umask(0)
+
+def entry2pb(e):
+ return e.rsplit('/', 1)
+
+def gauxpfx():
+ return _CL_AUX_GFID_PFX
+
+def md5hex(s):
+ return md5(s).hexdigest()
+
+def selfkill(sig=SIGTERM):
+ os.kill(os.getpid(), sig)
+
+def errno_wrap(call, arg=[], errnos=[], retry_errnos=[ESTALE]):
+ """ wrapper around calls resilient to errnos.
+ retry in case of ESTALE by default.
+ """
+ nr_tries = 0
+ while True:
+ try:
+ return call(*arg)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in errnos:
+ return ex.errno
+ if not ex.errno in retry_errnos:
+ raise
+ nr_tries += 1
+ if nr_tries == GF_OP_RETRIES:
+ # probably a screwed state, cannot do much...
+ logging.warn('reached maximum retries (%s)...' % repr(arg))
+ return
+ time.sleep(0.250) # retry the call
+
+def lstat(e):
+ try:
+ return os.lstat(e)
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
diff --git a/gf-error-codes.h.template b/gf-error-codes.h.template
new file mode 100644
index 000000000..ab6020d64
--- /dev/null
+++ b/gf-error-codes.h.template
@@ -0,0 +1,33 @@
+/***************************************************************/
+/** **/
+/** DO NOT EDIT THIS FILE **/
+/** THIS IS AUTO-GENERATED FROM LOG BOOK **/
+/** YOUR CHANGES WILL BE LOST IN NEXT BUILD **/
+/** **/
+/***************************************************************/
+
+#ifndef _GF_ERROR_CODES_H
+#define _GF_ERROR_CODES_H
+
+#include <libintl.h>
+
+#define _(STRING) gettext(STRING)
+
+
+/** START: ERROR CODE DEFINITIONS **/
+$DEFINES
+/** END: ERROR CODE DEFINITIONS **/
+
+
+/** START: FUNCTION RETURNS MESSAGE OF GIVEN ERROR CODE **/
+const char *
+_gf_get_message (int code) {
+ switch (code) {
+$CASES
+ default: return NULL;
+ }
+}
+/** END: FUNCTION RETURNS MESSAGE OF GIVEN ERROR CODE **/
+
+
+#endif
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
new file mode 100644
index 000000000..fab4a57d5
--- /dev/null
+++ b/glusterfs-api.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: glusterfs-api
+Description: GlusterFS API
+/* This is the API version, NOT package version */
+Version: 6
+Libs: -L${libdir} -lgfapi -lglusterfs -lgfrpc -lgfxdr
+Cflags: -I${includedir}/glusterfs -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 6960d8f06..040500c5f 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1,15 +1,15 @@
-# if you make changes, the it is advised to increment this number, and provide
-# a descriptive suffix to identify who owns or what the change represents
-# e.g. release_version 2.MSW
-%global release 1%{?dist}
-%global _sharedstatedir /var/lib
+%global _hardened_build 1
+%global _for_fedora_koji_builds 0
+
+# uncomment and add '%' to use the prereltag for pre-releases
+# global prereltag beta4
# if you wish to compile an rpm without rdma support, compile like this...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
%{?_without_rdma:%global _without_rdma --disable-ibverbs}
-# No RDMA Support on x390(x)
+# No RDMA Support on s390(x)
%ifarch s390 s390x
%global _without_rdma --disable-ibverbs
%endif
@@ -18,44 +18,147 @@
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without epoll
%{?_without_epoll:%global _without_epoll --disable-epoll}
-# if you wish to compile an rpm with fusermount...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with fusermount
-%{?_with_fusermount:%global _with_fusermount --enable-fusermount}
-
-%global version @PACKAGE_VERSION@
-%if "%{version}" >= "3.2"
-%global _can_georeplicate 1
+# if you wish to compile an rpm without fusermount...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without fusermount
+%{?_without_fusermount:%global _without_fusermount --disable-fusermount}
# if you wish to compile an rpm without geo-replication support, compile like this...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication
%{?_without_georeplication:%global _without_georeplication --disable-geo-replication}
+
+# if you wish to compile an rpm without the OCF resource agents...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ocf
+%{?_without_ocf:%global _without_ocf --without-ocf}
+
+# if you wish to build rpms without syslog logging, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@tar.gz --without syslog
+%{?_without_syslog:%global _without_syslog --disable-syslog}
+
+# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_syslog --disable-syslog
%endif
-Summary: Cluster File System
-Name: @PACKAGE_NAME@
-Version: %{version}
-Release: %{release}
-License: GPLv3+
-Group: System Environment/Base
-Vendor: Gluster Inc
-Packager: @PACKAGE_BUGREPORT@
-URL: http://www.gluster.org/docs/index.php/GlusterFS
-Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
-Requires(post): /sbin/chkconfig
-Requires(preun): /sbin/service, /sbin/chkconfig
+# there is no systemtap support! Perhaps some day there will be
+%global _without_systemtap --enable-systemtap=no
+
+# if you wish to compile an rpm without the BD map support...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bd
+%{?_without_bd:%global _without_bd --disable-bd-xlator}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+%define _without_bd --disable-bd-xlator
+%endif
+
+# if you wish to compile an rpm without the qemu-block support...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without qemu-block
+%{?_without_qemu_block:%global _without_qemu_block --disable-qemu-block}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+# xlators/features/qemu-block fails to build on RHEL5, disable it
+%define _without_qemu_block --disable-qemu-block
+%endif
+
+%if ( 0%{?fedora} && 0%{?fedora} > 16 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%global _with_systemd true
+%endif
+
+# From https://fedoraproject.org/wiki/Packaging:Python#Macros
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%endif
+
+Summary: Cluster File System
+%if ( 0%{_for_fedora_koji_builds} )
+Name: glusterfs
+Version: 3.4.1
+Release: 3%{?prereltag:.%{prereltag}}%{?dist}
+Vendor: Fedora Project
+%else
+Name: @PACKAGE_NAME@
+Version: @PACKAGE_VERSION@
+Release: 1%{?dist}
+Vendor: glusterfs.org
+%endif
+License: GPLv2 or LGPLv3+
+Group: System Environment/Base
+URL: http://www.gluster.org/docs/index.php/GlusterFS
+%if ( 0%{_for_fedora_koji_builds} )
+Source0: http://download.gluster.org/pub/gluster/glusterfs/3.4/%{version}%{?prereltag}/glusterfs-%{version}%{?prereltag}.tar.gz
+Source1: glusterd.sysconfig
+Source2: glusterfsd.sysconfig
+Source3: glusterfs-fuse.logrotate
+Source4: glusterd.logrotate
+Source5: glusterfsd.logrotate
+Source6: rhel5-load-fuse-modules
+Source11: glusterfsd.service
+Source13: glusterfsd.init
+Patch0: %{name}-3.2.5.configure.ac.patch
+Patch1: %{name}-3.3.0.libglusterfs.Makefile.patch
+Patch2: %{name}-3.3.1.rpc.rpcxprt.rdma.name.c.patch
+%else
+Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+%endif
+
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+BuildRequires: python-simplejson
+%endif
+%if ( 0%{?_with_systemd:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+%global glusterfsd_service %{S:%{SOURCE11}}
+%endif
+BuildRequires: systemd-units
+Requires(post): systemd-units
+Requires(preun): systemd-units
+Requires(postun): systemd-units
+%define _init_enable() /bin/systemctl enable %1.service ;
+%define _init_disable() /bin/systemctl disable %1.service ;
+%define _init_restart() /bin/systemctl try-restart %1.service ;
+%define _init_stop() /bin/systemctl stop %1.service ;
+%define _init_install() %{__install} -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ;
+# can't seem to make a generic macro that works
+%define _init_glusterd %{_unitdir}/glusterd.service
+%define _init_glusterfsd %{_unitdir}/glusterfsd.service
+%else
+%if ( 0%{_for_fedora_koji_builds} )
+%global glusterfsd_service %{S:%{SOURCE13}}
+%endif
+Requires(post): /sbin/chkconfig
+Requires(preun): /sbin/service
+Requires(preun): /sbin/chkconfig
Requires(postun): /sbin/service
+%define _init_enable() /sbin/chkconfig --add %1 ;
+%define _init_disable() /sbin/chkconfig --del %1 ;
+%define _init_restart() /sbin/service %1 condrestart &>/dev/null ;
+%define _init_stop() /sbin/service %1 stop &>/dev/null ;
+%define _init_install() %{__install} -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ;
+# can't seem to make a generic macro that works
+%define _init_glusterd %{_sysconfdir}/init.d/glusterd
+%define _init_glusterfsd %{_sysconfdir}/init.d/glusterfsd
+%endif
-BuildRequires: bison flex
-BuildRequires: gcc make automake libtool
-BuildRequires: ncurses-devel readline-devel openssl-devel
-BuildRequires: libxml2-devel
-BuildRequires: python-ctypes
-%if 0%{?suse_version}
-BuildRequires: python-devel
+Requires: %{name}-libs = %{version}-%{release}
+BuildRequires: bison flex
+BuildRequires: gcc make automake libtool
+BuildRequires: ncurses-devel readline-devel
+BuildRequires: libxml2-devel openssl-devel
+BuildRequires: libaio-devel
+BuildRequires: python-devel
+BuildRequires: python-ctypes
+%if ( 0%{!?_without_systemtap:1} )
+BuildRequires: systemtap-sdt-devel
+%endif
+%if ( 0%{!?_without_bd:1} )
+BuildRequires: lvm2-devel
+%endif
+%if ( 0%{!?_without_qemu_block:1} )
+BuildRequires: glib2-devel
%endif
-Requires: openssl
+Obsoletes: hekafs <= 0.7
Obsoletes: %{name}-libs <= 2.0.0
Obsoletes: %{name}-common < %{version}-%{release}
Obsoletes: %{name}-core < %{version}-%{release}
@@ -63,45 +166,104 @@ Provides: %{name}-libs = %{version}-%{release}
Provides: %{name}-common = %{version}-%{release}
Provides: %{name}-core = %{version}-%{release}
+# We do not want to generate useless provides and requires for xlator .so files
+# Filter all generated:
+#
+# TODO: RHEL5 does not have a convenient solution
+%if ( 0%{?rhel} == 6 )
+ # filter_setup exists in RHEL6 only
+ %filter_provides_in %{_libdir}/glusterfs/%{version}/
+ %global __filter_from_req %{?__filter_from_req} | %{__grep} -v -P '^(?!lib).*\.so.*$'
+ %filter_setup
+%else
+ # modern rpm and current Fedora do not generate requires if the provides
+ # are filtered
+ %global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
+%endif
+
+%if ( 0%{?rhel} && 0%{?rhel} < 6 )
+ # _sharedstatedir is not provided by RHEL5
+ %define _sharedstatedir /var/lib
+%endif
+
%description
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
-This package includes the glusterfs binary, the glusterd daemon and the gluster
-command line, libglusterfs and glusterfs translator modules common to both
-GlusterFS server and client framework.
+This package includes the glusterfs binary, the glusterfsd daemon and the
+gluster command line, libglusterfs and glusterfs translator modules common to
+both GlusterFS server and client framework.
-%if 0%{!?_without_rdma:1}
-%package rdma
-Summary: GlusterFS rdma support for ib-verbs
-Group: Applications/File
-BuildRequires: libibverbs-devel
+%package libs
+Summary: GlusterFS common libraries
+Group: Applications/File
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+Requires: rsyslog-mmjsonparse
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: rsyslog-mmcount
+%endif
+%endif
+
+%description libs
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the base GlusterFS libraries
+
+%package cli
+Summary: GlusterFS CLI
+Group: Applications/File
+Requires: %{name}-libs = %{version}-%{release}
+
+%description cli
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
-Requires: %{name} = %{version}-%{release}
+This package provides the GlusterFS CLI application and its man page
+
+%if ( 0%{!?_without_rdma:1} )
+%package rdma
+Summary: GlusterFS rdma support for ib-verbs
+Group: Applications/File
+BuildRequires: libibverbs-devel
+BuildRequires: librdmacm-devel
+Requires: %{name} = %{version}-%{release}
%description rdma
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
This package provides support to ib-verbs library.
%endif
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
+%if ( 0%{!?_without_georeplication:1} )
%package geo-replication
-Summary: GlusterFS Geo-replication
-Group: Applications/File
-Requires: %{name} = %{version}-%{release} , python-ctypes , rsync >= 3.0.0
+Summary: GlusterFS Geo-replication
+Group: Applications/File
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-server = %{version}-%{release}
%description geo-replication
GlusterFS is a clustered file-system capable of scaling to several
@@ -114,25 +276,25 @@ is in userspace and easily manageable.
This package provides support to geo-replication.
%endif
-%endif
%package fuse
-Summary: GlusterFS Fuse client
-Group: Applications/File
+Summary: Fuse client
+Group: Applications/File
+BuildRequires: fuse-devel
-Requires: %{name} >= %{version}-%{release}
+Requires: %{name} = %{version}-%{release}
Obsoletes: %{name}-client < %{version}-%{release}
Provides: %{name}-client = %{version}-%{release}
%description fuse
GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
This package provides support to FUSE based clients.
@@ -140,8 +302,14 @@ This package provides support to FUSE based clients.
Summary: Clustered file-system server
Group: System Environment/Daemons
Requires: %{name} = %{version}-%{release}
+Requires: %{name}-cli = %{version}-%{release}
+Requires: %{name}-libs = %{version}-%{release}
Requires: %{name}-fuse = %{version}-%{release}
-Requires: openssl
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+Requires: rpcbind
+%else
+Requires: portmap
+%endif
%description server
GlusterFS is a clustered file-system capable of scaling to several
@@ -154,10 +322,62 @@ is in user space and easily manageable.
This package provides the glusterfs server daemon.
+%package api
+Summary: Clustered file-system api library
+Group: System Environment/Daemons
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-devel = %{version}-%{release}
+# we provide the Python package/namespace 'gluster'
+Provides: python-gluster = %{version}-%{release}
+
+%description api
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%if ( 0%{!?_without_ocf:1} )
+%package resource-agents
+Summary: OCF Resource Agents for GlusterFS
+License: GPLv3+
+%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) )
+# EL5 does not support noarch sub-packages
+BuildArch: noarch
+%endif
+# this Group handling comes from the Fedora resource-agents package
+%if ( 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} )
+Group: System Environment/Base
+%else
+Group: Productivity/Clustering/HA
+%endif
+# for glusterd
+Requires: glusterfs-server
+# depending on the distribution, we need pacemaker or resource-agents
+Requires: %{_prefix}/lib/ocf/resource.d
+
+%description resource-agents
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the resource agents which plug glusterd into
+Open Cluster Framework (OCF) compliant cluster resource managers,
+like Pacemaker.
+%endif
+
%package devel
-Summary: Development Libraries
-Group: Development/Libraries
-Requires: %{name} = %{version}-%{release}
+Summary: Development Libraries
+Group: Development/Libraries
+Requires: %{name} = %{version}-%{release}
%description devel
GlusterFS is a clustered file-system capable of scaling to several
@@ -168,25 +388,81 @@ terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
is in user space and easily manageable.
-This package provides the development libraries.
+This package provides the development libraries and include files.
+
+%package api-devel
+Summary: Development Libraries
+Group: Development/Libraries
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-devel = %{version}-%{release}
+
+%description api-devel
+GlusterFS is a clustered file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
+or TCP/IP interconnect into one large parallel network file
+system. GlusterFS is one of the most sophisticated file systems in
+terms of features and extensibility. It borrows a powerful concept
+called Translators from GNU Hurd kernel. Much of the code in GlusterFS
+is in user space and easily manageable.
+
+This package provides the api include files.
+
+%package regression-tests
+Summary: Development Tools
+Group: Development/Tools
+Requires: %{name} = %{version}-%{release}
+Requires: %{name}-fuse = %{version}-%{release}
+Requires: %{name}-server = %{version}-%{release}
+Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng lvm2
+Requires: python attr dbench git nfs-utils xfsprogs
+
+%description regression-tests
+The Gluster Test Framework, is a suite of scripts used for
+regression testing of Gluster.
%prep
-%setup -q -n %{name}-%{version}
+%setup -q -n %{name}-%{version}%{?prereltag}
+%if ( 0%{_for_fedora_koji_builds} )
+#%patch0 -p0
+%patch1 -p0 -F4
+%if ( "%{version}" == "3.3.1" )
+%patch2 -p1
+%endif
+%endif
%build
./autogen.sh
-%configure %{?_without_rdma} %{?_without_epoll} %{?_with_fusermount} %{?_without_georeplication}
-
-# Remove rpath
-sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
-sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
+%configure \
+ %{?_without_rdma} \
+ %{?_without_epoll} \
+ %{?_without_fusermount} \
+ %{?_without_georeplication} \
+ %{?_without_ocf} \
+ %{?_without_syslog} \
+ %{?_without_bd} \
+ %{?_without_qemu_block} \
+ %{?_without_systemtap}
+
+# fix hardening and remove rpath in shlibs
+%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%{__sed} -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+%endif
+%{__sed} -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+%{__sed} -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
%{__make} %{?_smp_mflags}
+pushd api/examples
+FLAGS="$RPM_OPT_FLAGS" %{__python} setup.py build
+popd
%install
%{__rm} -rf %{buildroot}
%{__make} install DESTDIR=%{buildroot}
+# install the gfapi Python library in /usr/lib/python*/site-packages
+pushd api/examples
+%{__python} setup.py install --skip-build --verbose --root %{buildroot}
+popd
# Install include directory
%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs
%{__install} -p -m 0644 libglusterfs/src/*.h \
@@ -202,19 +478,45 @@ sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server
%{__install} -p -m 0644 xlators/protocol/server/src/*.h \
%{buildroot}%{_includedir}/glusterfs/server/
+%if ( 0%{_for_fedora_koji_builds} )
+%{__install} -D -p -m 0644 %{SOURCE1} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%{__install} -D -p -m 0644 %{SOURCE2} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
+%else
+%{__install} -D -p -m 0644 extras/glusterd-sysconfig \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%endif
+%if ( 0%{_for_fedora_koji_builds} )
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{__install} -D -p -m 0755 %{SOURCE6} \
+ %{buildroot}%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules
+%endif
+%endif
+
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterd
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfs
+%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfsd
+%{__mkdir_p} %{buildroot}%{_localstatedir}/run/gluster
# Remove unwanted files from all the shared libraries
find %{buildroot}%{_libdir} -name '*.a' -delete
find %{buildroot}%{_libdir} -name '*.la' -delete
-# Remove installed docs, we include them ourselves as %%doc
+# Remove installed docs, they're included by %%doc
%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/
+head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
+cat << EOM >> ChangeLog
-# Rename the samples, so we can include them as %%config
-#for file in %{buildroot}%{_sysconfdir}/glusterfs/*.sample; do
-# %{__mv} ${file} `dirname ${file}`/`basename ${file} .sample`
-#done
+More commit messages for this ChangeLog can be found at
+https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag}
+EOM
+
+# Remove benchmarking and other unpackaged files
+%{__rm} -rf %{buildroot}/benchmarking
+%{__rm} -f %{buildroot}/glusterfs-mode.el
+%{__rm} -f %{buildroot}/glusterfs.vim
# Create working directory
%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd
@@ -223,149 +525,430 @@ find %{buildroot}%{_libdir} -name '*.la' -delete
sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
%{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol
-# Following needed by the hooks interface
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hook-scripts
-%{__install} -p -m 0644 extras/hook-scripts/*.sh \
- %{buildroot}%{_sharedstatedir}/glusterd/hook-scripts/
+# Install glusterfsd .service or init.d file
+%if ( 0%{_for_fedora_koji_builds} )
+%_init_install %{glusterfsd_service} glusterfsd
+%endif
-# Clean up the examples we want to include as %%doc
-#%{__cp} -a doc/examples examples
-#%{__rm} -f examples/Makefile*
+%if ( 0%{_for_fedora_koji_builds} )
+# Client logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE3} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-fuse
+
+# Server logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE4} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterd
+# Legacy server logrotate entry
+%{__install} -D -p -m 0644 %{SOURCE5} \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfsd
+%else
+%{__install} -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+# geo-rep ghosts
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+%{__install} -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+%endif
+
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%{__install} -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \
+ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+%{__install} -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \
+ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%{__install} -D -p -m 0644 extras/logger.conf.example \
+ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example
+%endif
+%endif
+
+# the rest of the ghosts
+touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+touch %{buildroot}%{_sharedstatedir}/glusterd/options
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/peers
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/vols
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/groups
+%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+
+find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
%clean
%{__rm} -rf %{buildroot}
%post
/sbin/ldconfig
-
-# Copy the 'glusterfs-logrotate' file at the right place
-if [ -d /etc/logrotate.d ]; then
- cp %{_docdir}/%{name}-%{version}/glusterfs-logrotate /etc/logrotate.d/glusterfs
-fi
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%_init_restart rsyslog
+%endif
+%endif
%postun
/sbin/ldconfig
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%_init_restart rsyslog
+%endif
+%endif
%files
-%defattr(-,root,root)
-%doc AUTHORS ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL NEWS README THANKS
-%doc extras/glusterfs-logrotate
+%defattr(-,root,root,-)
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS
+%config(noreplace) %{_sysconfdir}/logrotate.d/*
+%config(noreplace) %{_sysconfdir}/sysconfig/*
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%{_sysconfdir}/rsyslog.d/gluster.conf.example
+%endif
+%endif
%{_libdir}/glusterfs
-%{_libdir}/*.so.*
%{_sbindir}/glusterfs*
%{_mandir}/man8/*gluster*.8*
+%exclude %{_mandir}/man8/gluster.8*
%dir %{_localstatedir}/log/glusterfs
-%if 0%{!?_without_rdma:1}
-%exclude %{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
+%dir %{_localstatedir}/run/gluster
+%dir %{_sharedstatedir}/glusterd
+%if ( 0%{!?_without_rdma:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
%endif
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/storage*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/nfs*
+# server-side, etc., xlators in other RPMs
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+# sample xlators not generally used or usable
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/encryption/rot-13*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/mac-compat*
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/testing/performance/symlink-cache*
+
+%post libs
+/sbin/ldconfig
+
+%postun libs
+/sbin/ldconfig
-%if 0%{!?_without_rdma:1}
+%files libs
+%{_libdir}/*.so.*
+%exclude %{_libdir}/libgfapi.*
+
+%files cli
+%{_sbindir}/gluster
+%{_mandir}/man8/gluster.8*
+
+%if ( 0%{!?_without_rdma:1} )
%files rdma
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
+%defattr(-,root,root,-)
+%{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma*
%endif
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
+%if ( 0%{!?_without_georeplication:1} )
%post geo-replication
#restart glusterd.
-%{_sysconfdir}/init.d/glusterd restart &> /dev/null
-%endif
+if [ $1 -ge 1 ]; then
+ %_init_restart glusterd
+fi
-%if 0%{!?_without_georeplication:1}
%files geo-replication
%defattr(-,root,root)
%{_libexecdir}/glusterfs/gsyncd
%{_libexecdir}/glusterfs/python/syncdaemon/*
-%endif
+%{_libexecdir}/glusterfs/gverify.sh
+%{_libexecdir}/glusterfs/peer_add_secret_pub
+%{_libexecdir}/glusterfs/peer_gsec_create
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication
+%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
%endif
%files fuse
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-%{_mandir}/man8/mount.glusterfs.8*
+%defattr(-,root,root,-)
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-fuse
+%endif
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse*
/sbin/mount.glusterfs
-%if 0%{?_with_fusermount:1}
+%if ( 0%{!?_without_fusermount:1} )
%{_bindir}/fusermount-glusterfs
%endif
+%if ( 0%{_for_fedora_koji_builds} )
+%if ( 0%{?rhel} && 0%{?rhel} <= 5 )
+%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules
+%endif
+%endif
-%post server
-/sbin/chkconfig --add glusterd
+%files server
+%defattr(-,root,root,-)
+%doc extras/clear_xattrs.sh
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterd
+%endif
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+%config(noreplace) %{_sysconfdir}/glusterfs
+# Legacy configs
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfsd
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+%endif
+# init files
+%_init_glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%_init_glusterfsd
+%endif
+# binaries
+%{_sbindir}/glusterd
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs*
+%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+# This is really ugly, but I have no idea how to mark these directories in an
+# other way. They should belong to the glusterfs-server package, but don't
+# exist after installation. They are generated on the first start...
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+
+%post api
+/sbin/ldconfig
+
+%postun api
+/sbin/ldconfig
+
+%files api
+%exclude %{_libdir}/*.so
+%{_libdir}/libgfapi.*
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api*
+%{python_sitelib}/*
+
+%if ( 0%{!?_without_ocf:1} )
+%files resource-agents
+%defattr(-,root,root)
+# /usr/lib is the standard for OCF, also on x86_64
+%{_prefix}/lib/ocf/resource.d/glusterfs
+%endif
+
+%files devel
+%defattr(-,root,root,-)
+%{_includedir}/glusterfs
+%exclude %{_includedir}/glusterfs/y.tab.h
+%exclude %{_includedir}/glusterfs/api
+%exclude %{_libdir}/libgfapi.so
+%{_libdir}/*.so
-# Move legacy sysconf files to the correct sysconfdir
-if [ -d /etc/glusterd ]; then
- cp -a /etc/glusterd /var/lib/
- mv /etc/glusterd /etc/glusterd.rpmsave
+%files api-devel
+%{_libdir}/pkgconfig/glusterfs-api.pc
+%{_libdir}/pkgconfig/libgfchangelog.pc
+%{_libdir}/libgfapi.so
+%{_includedir}/glusterfs/api/*
+
+%files regression-tests
+%defattr(-,root,root,-)
+%{_prefix}/share/glusterfs/*
+%exclude %{_prefix}/share/glusterfs/tests/basic/rpm.t
+
+%post server
+# Legacy server
+%_init_enable glusterd
+%_init_enable glusterfsd
+
+# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if
+# there are any files in /etc from a prior gluster.org install, move them
+# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib
+# in gluster.org RPMs.) Be careful to copy them on the off chance that
+# /etc and /var/lib are on separate file systems
+if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
+ %{__mkdir_p} %{_sharedstatedir}/glusterd
+ cp -a /etc/glusterd %{_sharedstatedir}/glusterd
+ rm -rf /etc/glusterd
+ ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
fi
-if [ -d /var/lib/glusterd/vols ]; then
- # Rename old volfiles in an RPM-standard way. These aren't actually
- # considered package config files, so %config doesn't work for them.
- for file in $(find /var/lib/glusterd/vols -name '*.vol'); do
+# Rename old volfiles in an RPM-standard way. These aren't actually
+# considered package config files, so %config doesn't work for them.
+if [ -d %{_sharedstatedir}/glusterd/vols ]; then
+ for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
newfile=${file}.rpmsave
echo "warning: ${file} saved as ${newfile}"
cp ${file} ${newfile}
done
fi
+# add marker translator
+# but first make certain that there are no old libs around to bite us
+# BZ 834847
+if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then
+ rm -f /etc/ld.so.conf.d/glusterfs.conf
+ /sbin/ldconfig
+fi
pidof -c -o %PPID -x glusterd &> /dev/null
if [ $? -eq 0 ]; then
kill -9 `pgrep -f gsyncd.py` &> /dev/null
killall glusterd &> /dev/null
- #add marker translator
- glusterd --xlator-option *.upgrade=on
+ glusterd --xlator-option *.upgrade=on -N
else
- glusterd --xlator-option *.upgrade=on
- sleep 10
- killall glusterd &> /dev/null
+ glusterd --xlator-option *.upgrade=on -N
fi
%preun server
if [ $1 -eq 0 ]; then
- /sbin/service glusterd stop &>/dev/null || :
- /sbin/chkconfig --del glusterd
+ if [ -f %_init_glusterfsd ]; then
+ %_init_stop glusterfsd
+ fi
+ %_init_stop glusterd
+ if [ -f %_init_glusterfsd ]; then
+ %_init_disable glusterfsd
+ fi
+ %_init_disable glusterd
fi
if [ $1 -ge 1 ]; then
- /sbin/service glusterd condrestart &>/dev/null || :
+ if [ -f %_init_glusterfsd ]; then
+ %_init_restart glusterfsd
+ fi
+ %_init_restart glusterd
fi
-# Legacy server
-if [ $1 -eq 0 ]; then
- /sbin/service glusterfsd stop &>/dev/null || :
- /sbin/chkconfig --del glusterfsd
-fi
-if [ $1 -ge 1 ]; then
- /sbin/service glusterfsd condrestart &>/dev/null || :
-fi
+%changelog
+* Wed Oct 11 2013 Harshavardhana <fharshav@redhat.com>
+- Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184)
-%files server
-%defattr(-,root,root,-)
-#%doc examples/ doc/glusterfs*.vol.sample
-%config(noreplace) %{_sysconfdir}/glusterfs
-%{_sharedstatedir}/glusterd
-%{_sysconfdir}/init.d/glusterd
-%{_sbindir}/gluster
-%{_sbindir}/glusterd
-%{_libdir}/glusterfs/%{version}/xlator/storage*
-%{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%{_libdir}/glusterfs/%{version}/xlator/nfs*
+* Wed Oct 9 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.4.1-2+
-%files devel
-%defattr(-,root,root,-)
-%{_includedir}/glusterfs
-%exclude %{_includedir}/glusterfs/y.tab.h
-%{_libdir}/*.so
+* Wed Oct 9 2013 Niels de Vos <ndevos@redhat.com>
+- glusterfs-api-devel requires glusterfs-devel (#1016938, #1017094)
+
+* Mon Sep 30 2013 Niels de Vos <ndevos@redhat.com>
+- Package gfapi.py into the Python site-packages path (#1005146)
+
+* Tue Sep 17 2013 Harshavardhana <fharshav@redhat.com>
+- Provide a new package called "glusterfs-regression-tests" for standalone
+ regression testing.
+
+* Thu Aug 22 2013 Niels de Vos <ndevos@redhat.com>
+- Correct the day/date for some entries in this changelog (#1000019)
+
+* Wed Aug 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+- add Requires
+- add -cli subpackage,
+- fix other minor differences with Fedora glusterfs.spec
+
+* Tue Jul 30 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm
+
+* Thu Jul 25 2013 Csaba Henk <csaba@redhat.com>
+- Added peer_add_secret_pub and peer_gsec_create to %{_libexecdir}/glusterfs
+
+* Thu Jul 25 2013 Aravinda VK <avishwan@redhat.com>
+- Added gverify.sh to %{_libexecdir}/glusterfs directory.
+
+* Thu Jul 25 2013 Harshavardhana <fharshav@redhat.com>
+- Allow to build with '--without bd' to disable 'bd' xlator
+
+* Thu Jun 27 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- fix the hardening fix for shlibs, use %%{__sed} macro, shorter ChangeLog
+
+* Wed Jun 26 2013 Niels de Vos <ndevos@redhat.com>
+- move the mount/api xlator to glusterfs-api
+
+* Fri Jun 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, remove G4S/UFO and Swift
+
+* Mon Mar 4 2013 Niels de Vos <ndevos@redhat.com>
+- Package /var/run/gluster so that statedumps can be created
+
+* Wed Feb 6 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+
+* Tue Dec 11 2012 Filip Pytloun <filip.pytloun@gooddata.com>
+- add sysconfig file
+
+* Thu Oct 25 2012 Niels de Vos <ndevos@redhat.com>
+- Add a sub-package for the OCF resource agents
+
+* Wed Sep 05 2012 Niels de Vos <ndevos@redhat.com>
+- Don't use python-ctypes on SLES (from Jörg Petersen)
+
+* Tue Jul 10 2012 Niels de Vos <ndevos@redhat.com>
+- Include extras/clear_xattrs.sh in the glusterfs-server sub-package
+
+* Thu Jun 07 2012 Niels de Vos <ndevos@redhat.com>
+- Mark /var/lib/glusterd as owned by glusterfs, subdirs belong to -server
-%changelog
* Wed May 9 2012 Kaleb S. KEITHLEY <kkeithle[at]redhat.com>
- Add BuildRequires: libxml2-devel so that configure will DTRT on for
- Fedora's Koji build system
@@ -385,7 +968,7 @@ fi
- Fixed version reporting 3.2git
- Added nfs init script (disabled by default)
-* Fri Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
+* Thu Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
- Update to 3.2.3
* Tue Jul 19 2011 Joe Julian <me@joejulian.name> - 3.2.2-3
@@ -400,13 +983,13 @@ fi
* Wed Jul 13 2011 Joe Julian <me@joejulian.name> - 3.2.1-2
- fix hardcoded path to gsyncd in source to match the actual file location
-* Mon Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
+* Tue Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
- Update to 3.2.1
* Mon Jun 20 2011 Joe Julian <me@joejulian.name> - 3.1.5
- Update to 3.1.5
-* Mon May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
+* Tue May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
- Current git
* Sun May 29 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.2
@@ -419,7 +1002,7 @@ fi
- Add patch to remove forced 64 bit compile
- Obsolete glusterfs-core to allow for upgrading from gluster packaging
-* Sun Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
+* Sat Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
- Update to 3.1.3
- Merge in more upstream SPEC changes
- Remove patches from GlusterFS bugzilla #2309 and #2311
@@ -428,7 +1011,7 @@ fi
* Sun Feb 06 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-3
- Add back in legacy SPEC elements to support older branches
-* Tue Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
+* Thu Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
- Add patches from CloudFS project
* Tue Jan 25 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-1
@@ -464,7 +1047,7 @@ fi
* Sat Jan 2 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.9-1
- Update to 2.0.9
-* Sat Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
+* Sun Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
- Update to 2.0.8
- Remove install of glusterfs-volgen, it's properly added to
automake upstream now
@@ -532,10 +1115,10 @@ fi
* Fri May 9 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-1
- Update to 1.3.8 final.
-* Tue Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
+* Wed Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
- Include short patch to include fixes from latest TLA 751.
-* Mon Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
+* Tue Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
- Update to 1.3.8pre6.
- Include glusterfs binary in both the client and server packages, now that
glusterfsd is a symlink to it instead of a separate binary.
@@ -579,7 +1162,7 @@ fi
* Wed Nov 21 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-1
- Major spec file cleanup.
-- Add misssing %%clean section.
+- Add missing %%clean section.
- Fix ldconfig calls (weren't set for the proper sub-package).
* Sat Aug 4 2007 Matt Paine <matt@mattsoftware.com> - 1.3.pre7
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 17d7a4a81..05a10dee3 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -4,15 +4,17 @@ glusterfsd_SOURCES = glusterfsd.c glusterfsd-mgmt.c
glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
$(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(GF_LDADD)
-glusterfsd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
+ $(GF_LDADD) $(GF_GLUSTERFS_LIBS)
+glusterfsd_LDFLAGS = $(GF_LDFLAGS)
noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS) \
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
-I$(top_srcdir)/rpc/rpc-lib/src -I$(top_srcdir)/rpc/xdr/src
+AM_CFLAGS = -Wall $(GF_GLUSTERFS_CFLAGS)
+
CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
@@ -24,7 +26,9 @@ uninstall-local:
install-data-local:
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run
+ $(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run/gluster
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/log/glusterfs
+ $(INSTALL) -d -m 755 $(DESTDIR)$(sbindir)
rm -f $(DESTDIR)$(sbindir)/glusterfs
rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterfs
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h
index a28a7b2e3..7135c0ada 100644
--- a/glusterfsd/src/glusterfsd-mem-types.h
+++ b/glusterfsd/src/glusterfsd-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_MEM_TYPES_H__
#define __GLUSTERFSD_MEM_TYPES_H__
@@ -27,10 +17,10 @@
enum gfd_mem_types_ {
gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
gfd_mt_xlator_t,
+ gfd_mt_server_cmdline_t,
gfd_mt_xlator_cmdline_option_t,
gfd_mt_char,
gfd_mt_call_pool_t,
- gfd_mt_vol_top_priv_t,
gfd_mt_end
};
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index d8582aa70..1c9220927 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -1,28 +1,17 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <pthread.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -42,25 +31,28 @@
#include "xdr-generic.h"
#include "glusterfsd.h"
-#include "glusterfsd-mem-types.h"
#include "rpcsvc.h"
#include "cli1-xdr.h"
#include "statedump.h"
#include "syncop.h"
+#include "xlator.h"
-static char is_mgmt_rpc_reconnect;
+static gf_boolean_t is_mgmt_rpc_reconnect = _gf_false;
int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
int glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp);
int glusterfs_graph_unknown_options (glusterfs_graph_t *graph);
+int emancipate(glusterfs_ctx_t *ctx, int ret);
int
-mgmt_cbk_spec (void *data)
+mgmt_cbk_spec (struct rpc_clnt *rpc, void *mydata, void *data)
{
glusterfs_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
- ctx = glusterfs_ctx_get ();
+ this = mydata;
+ ctx = glusterfsd_ctx;
gf_log ("mgmt", GF_LOG_INFO, "Volume file changed");
glusterfs_volfile_fetch (ctx);
@@ -69,10 +61,11 @@ mgmt_cbk_spec (void *data)
int
-mgmt_cbk_event (void *data)
+mgmt_cbk_event (struct rpc_clnt *rpc, void *mydata, void *data)
{
return 0;
}
+
struct iobuf *
glusterfs_serialize_reply (rpcsvc_request_t *req, void *arg,
struct iovec *outmsg, xdrproc_t xdrproc)
@@ -141,11 +134,10 @@ glusterfs_submit_reply (rpcsvc_request_t *req, void *arg,
iob = glusterfs_serialize_reply (req, arg, &rsp, xdrproc);
if (!iob) {
gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
- goto out;
+ } else {
+ iobref_add (iobref, iob);
}
- iobref_add (iobref, iob);
-
ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
iobref);
@@ -183,15 +175,14 @@ glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
if (dict)
ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
if (ret == 0)
ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
(xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
+ GF_FREE (rsp.output.output_val);
if (dict)
dict_unref (dict);
return ret;
@@ -225,80 +216,20 @@ glusterfs_translator_info_response_send (rpcsvc_request_t *req, int ret,
if (output) {
ret = dict_allocate_and_serialize (output,
&rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
}
if (!ret)
free_ptr = _gf_true;
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
if (free_ptr)
GF_FREE (rsp.output.output_val);
return ret;
}
int
-glusterfs_handle_translator_info_get_cont (gfd_vol_top_priv_t *priv)
-{
- int ret = -1;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- glusterfs_graph_t *active = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char msg[2048] = {0,};
- dict_t *output = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (priv);
-
- dict = dict_new ();
- ret = dict_unserialize (priv->xlator_req.input.input_val,
- priv->xlator_req.input.input_len, &dict);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to unserialize dict");
- goto cont;
- }
- ret = dict_set_double (dict, "time", priv->time);
- if (ret)
- goto cont;
- ret = dict_set_double (dict, "throughput", priv->throughput);
- if (ret)
- goto cont;
-
-cont:
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
-
- xlator = xlator_search_by_name (any, priv->xlator_req.name);
- if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- priv->xlator_req.name);
- goto out;
- }
-
- output = dict_new ();
- ret = xlator->notify (xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
-
-out:
- ret = glusterfs_translator_info_response_send (priv->req, ret,
- msg, output);
-
- if (priv->xlator_req.name)
- free (priv->xlator_req.name);
- if (priv->xlator_req.input.input_val)
- free (priv->xlator_req.input.input_val);
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- GF_FREE (priv);
-
- return ret;
-}
-
-int
glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
char *msg, dict_t *output)
{
@@ -317,7 +248,7 @@ glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
if (output) {
ret = dict_allocate_and_serialize (output,
&rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
}
if (!ret)
free_ptr = _gf_true;
@@ -334,28 +265,35 @@ glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
int
glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- gf1_cli_top_op top_op = 0;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- gfd_vol_top_priv_t *priv = NULL;
- pthread_t tid = -1;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf1_cli_top_op top_op = 0;
+ uint32_t blk_size = 0;
+ uint32_t blk_count = 0;
+ double time = 0;
+ double throughput = 0;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ glusterfs_graph_t *active = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char msg[2048] = {0,};
+ dict_t *output = NULL;
GF_ASSERT (req);
this = THIS;
GF_ASSERT (this);
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
goto out;
}
- dict = dict_new ();
+ dict = dict_new ();
ret = dict_unserialize (xlator_req.input.input_val,
xlator_req.input.input_len,
&dict);
@@ -366,14 +304,6 @@ glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
goto out;
}
- priv = GF_MALLOC (sizeof (gfd_vol_top_priv_t), gfd_mt_vol_top_priv_t);
- if (!priv) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to allocate memory");
- goto out;
- }
- priv->xlator_req = xlator_req;
- priv->req = req;
-
ret = dict_get_int32 (dict, "top-op", (int32_t *)&top_op);
if ((!ret) && (GF_CLI_TOP_READ_PERF == top_op ||
GF_CLI_TOP_WRITE_PERF == top_op)) {
@@ -383,61 +313,73 @@ glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
ret = dict_get_uint32 (dict, "blk-cnt", &blk_count);
if (ret)
goto cont;
- priv->blk_size = blk_size;
- priv->blk_count = blk_count;
+
if (GF_CLI_TOP_READ_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_read_perf,
- priv);
+ ret = glusterfs_volume_top_read_perf
+ (blk_size, blk_count, xlator_req.name,
+ &throughput, &time);
} else if ( GF_CLI_TOP_WRITE_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_write_perf,
- priv);
+ ret = glusterfs_volume_top_write_perf
+ (blk_size, blk_count, xlator_req.name,
+ &throughput, &time);
}
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Thread create failed");
+ ret = dict_set_double (dict, "time", time);
+ if (ret)
+ goto cont;
+ ret = dict_set_double (dict, "throughput", throughput);
+ if (ret)
goto cont;
- }
- gf_log ("glusterd", GF_LOG_DEBUG, "Created new thread with "
- "tid %u", (unsigned int)tid);
- goto out;
}
cont:
- priv->throughput = 0;
- priv->time = 0;
- ret = glusterfs_handle_translator_info_get_cont (priv);
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+ active = ctx->active;
+ any = active->first;
+
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ ret = xlator->notify (xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
+
out:
+ ret = glusterfs_translator_info_response_send (req, ret, msg, output);
+
+ free (xlator_req.name);
+ free (xlator_req.input.input_val);
+ if (output)
+ dict_unref (output);
if (dict)
dict_unref (dict);
return ret;
}
-void *
-glusterfs_volume_top_write_perf (void *args)
+int
+glusterfs_volume_top_write_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time)
{
int32_t fd = -1;
int32_t input_fd = -1;
char export_path[PATH_MAX];
char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
int32_t iter = 0;
int32_t ret = -1;
uint64_t total_blks = 0;
struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
+ GF_ASSERT (brick_path);
+ GF_ASSERT (throughput);
+ GF_ASSERT (time);
+ if (!(blk_size > 0) || ! (blk_count > 0))
+ goto out;
snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
+ brick_path, ".gf-tmp-stats-perf");
fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
if (-1 == fd) {
@@ -481,55 +423,46 @@ glusterfs_volume_top_write_perf (void *args)
}
gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
+ *time = (end.tv_sec - begin.tv_sec) * 1e6
+ (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
+ *throughput = total_blks / *time;
gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes written %"PRId64, throughput, time, total_blks);
+ "bytes written %"PRId64, *throughput, *time, total_blks);
out:
- priv->throughput = throughput;
- priv->time = time;
-
if (fd >= 0)
close (fd);
if (input_fd >= 0)
close (input_fd);
- if (buf)
- GF_FREE (buf);
+ GF_FREE (buf);
unlink (export_path);
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ return ret;
}
-void *
-glusterfs_volume_top_read_perf (void *args)
+int
+glusterfs_volume_top_read_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time)
{
int32_t fd = -1;
int32_t input_fd = -1;
int32_t output_fd = -1;
char export_path[PATH_MAX];
char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
int32_t iter = 0;
int32_t ret = -1;
uint64_t total_blks = 0;
struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
+ GF_ASSERT (brick_path);
+ GF_ASSERT (throughput);
+ GF_ASSERT (time);
+ if (!(blk_size > 0) || ! (blk_count > 0))
+ goto out;
snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
+ brick_path, ".gf-tmp-stats-perf");
fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
if (-1 == fd) {
ret = -1;
@@ -605,33 +538,27 @@ glusterfs_volume_top_read_perf (void *args)
}
gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
- + (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
+ *time = (end.tv_sec - begin.tv_sec) * 1e6
+ + (end.tv_usec - begin.tv_usec);
+ *throughput = total_blks / *time;
gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes read %"PRId64, throughput, time, total_blks);
+ "bytes read %"PRId64, *throughput, *time, total_blks);
out:
- priv->throughput = throughput;
- priv->time = time;
-
if (fd >= 0)
close (fd);
if (input_fd >= 0)
close (input_fd);
if (output_fd >= 0)
close (output_fd);
- if (buf)
- GF_FREE (buf);
+ GF_FREE (buf);
unlink (export_path);
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ return ret;
}
int
-glusterfs_handle_translator_op (void *data)
+glusterfs_handle_translator_op (rpcsvc_request_t *req)
{
int32_t ret = -1;
gd1_mgmt_brick_op_req xlator_req = {0,};
@@ -646,20 +573,20 @@ glusterfs_handle_translator_op (void *data)
xlator_t *this = NULL;
int i = 0;
int count = 0;
- rpcsvc_request_t *req = data;
GF_ASSERT (req);
this = THIS;
GF_ASSERT (this);
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
active = ctx->active;
any = active->first;
input = dict_new ();
@@ -712,8 +639,7 @@ out:
dict_unref (input);
if (output)
dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
+ free (xlator_req.name); //malloced by xdr
return 0;
}
@@ -737,7 +663,7 @@ glusterfs_handle_defrag (rpcsvc_request_t *req)
this = THIS;
GF_ASSERT (this);
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
GF_ASSERT (ctx);
active = ctx->active;
@@ -747,8 +673,9 @@ glusterfs_handle_defrag (rpcsvc_request_t *req)
}
any = active->first;
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
//failed to decode msg;
req->rpc_err = GARBAGE_ARGS;
goto out;
@@ -786,12 +713,10 @@ glusterfs_handle_defrag (rpcsvc_request_t *req)
out:
if (dict)
dict_unref (dict);
- if (xlator_req.input.input_val)
- free (xlator_req.input.input_val); // malloced by xdr
+ free (xlator_req.input.input_val); // malloced by xdr
if (output)
dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
+ free (xlator_req.name); //malloced by xdr
return ret;
@@ -818,8 +743,9 @@ glusterfs_handle_brick_status (rpcsvc_request_t *req)
this = THIS;
GF_ASSERT (this);
- if (!xdr_to_generic (req->msg[0], &brick_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
goto out;
}
@@ -845,7 +771,7 @@ glusterfs_handle_brick_status (rpcsvc_request_t *req)
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
GF_ASSERT (ctx);
active = ctx->active;
any = active->first;
@@ -903,39 +829,30 @@ glusterfs_handle_brick_status (rpcsvc_request_t *req)
rsp.op_errstr = "";
ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
if (ret) {
gf_log (this->name, GF_LOG_ERROR,
"Failed to serialize output dict to rsp");
goto out;
}
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
if (dict)
dict_unref (dict);
if (output)
dict_unref (output);
- if (brick_req.input.input_val)
- free (brick_req.input.input_val);
- if (xname)
- GF_FREE (xname);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
+ free (brick_req.input.input_val);
+ GF_FREE (xname);
+ GF_FREE (msg);
+ GF_FREE (rsp.output.output_val);
return ret;
}
-static int
-glusterfs_command_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
- return 0;
-}
int
glusterfs_handle_node_status (rpcsvc_request_t *req)
@@ -958,8 +875,9 @@ glusterfs_handle_node_status (rpcsvc_request_t *req)
GF_ASSERT (req);
- if (!xdr_to_generic (req->msg[0], &node_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &node_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
goto out;
}
@@ -985,7 +903,7 @@ glusterfs_handle_node_status (rpcsvc_request_t *req)
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
GF_ASSERT (ctx);
active = ctx->active;
any = active->first;
@@ -1087,29 +1005,25 @@ glusterfs_handle_node_status (rpcsvc_request_t *req)
rsp.op_errstr = "";
ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR,
"Failed to serialize output dict to rsp");
goto out;
}
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
if (dict)
dict_unref (dict);
- if (node_req.input.input_val)
- free (node_req.input.input_val);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
- if (node_name)
- GF_FREE (node_name);
- if (subvol_name)
- GF_FREE (subvol_name);
+ free (node_req.input.input_val);
+ GF_FREE (msg);
+ GF_FREE (rsp.output.output_val);
+ GF_FREE (node_name);
+ GF_FREE (subvol_name);
gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -1132,8 +1046,9 @@ glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
GF_ASSERT (req);
- if (!xdr_to_generic (req->msg[0], &nfs_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
+ ret = xdr_to_generic (req->msg[0], &nfs_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
req->rpc_err = GARBAGE_ARGS;
goto out;
}
@@ -1153,7 +1068,7 @@ glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
GF_ASSERT (ctx);
active = ctx->active;
@@ -1185,72 +1100,115 @@ glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
rsp.op_errstr = "";
ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
+ &rsp.output.output_len);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR,
"Failed to serialize output dict to rsp");
goto out;
}
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
- if (nfs_req.input.input_val)
- free (nfs_req.input.input_val);
+ free (nfs_req.input.input_val);
if (dict)
dict_unref (dict);
if (output)
dict_unref (output);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
+ GF_FREE (rsp.output.output_val);
gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
int
-glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
+glusterfs_handle_volume_barrier_op (rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *this = THIS;
- call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {0,};
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ GF_ASSERT (req);
+ this = THIS;
GF_ASSERT (this);
- switch (req->procnum) {
- case GLUSTERD_BRICK_TERMINATE:
- ret = glusterfs_handle_terminate (req);
- break;
- case GLUSTERD_BRICK_XLATOR_INFO:
- ret = glusterfs_handle_translator_info_get (req);
- break;
- case GLUSTERD_BRICK_XLATOR_OP:
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
- ret = synctask_new (this->ctx->env,
- glusterfs_handle_translator_op,
- glusterfs_command_done, frame, req);
- break;
- case GLUSTERD_BRICK_STATUS:
- ret = glusterfs_handle_brick_status (req);
- break;
- case GLUSTERD_BRICK_XLATOR_DEFRAG:
- ret = glusterfs_handle_defrag (req);
- break;
- case GLUSTERD_NODE_PROFILE:
- ret = glusterfs_handle_nfs_profile (req);
- break;
- case GLUSTERD_NODE_STATUS:
- ret = glusterfs_handle_node_status (req);
- default:
- break;
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT (ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic (req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ //failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize (xlator_req.input.input_val,
+ xlator_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
}
+ xlator = xlator_search_by_name (any, xlator_req.name);
+ if (!xlator) {
+ snprintf (msg, sizeof (msg), "xlator %s is not loaded",
+ xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new ();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = xlator->notify (xlator, GF_EVENT_VOLUME_BARRIER_OP,
+ dict, output);
+
+ ret = glusterfs_translator_info_response_send (req, ret,
+ msg, output);
out:
+ if (dict)
+ dict_unref (dict);
+ free (xlator_req.input.input_val); // malloced by xdr
+ if (output)
+ dict_unref (output);
+ free (xlator_req.name); //malloced by xdr
+
+ return ret;
+
+}
+int
+glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ /* for now, nothing */
return ret;
}
-rpcclnt_cb_actor_t gluster_cbk_actors[] = {
+rpcclnt_cb_actor_t mgmt_cbk_actors[] = {
[GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
[GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
mgmt_cbk_event},
@@ -1261,7 +1219,7 @@ struct rpcclnt_cb_program mgmt_cbk_prog = {
.progname = "GlusterFS Callback",
.prognum = GLUSTER_CBK_PROGRAM,
.progver = GLUSTER_CBK_VERSION,
- .actors = gluster_cbk_actors,
+ .actors = mgmt_cbk_actors,
.numactors = GF_CBK_MAXVALUE,
};
@@ -1298,14 +1256,15 @@ rpc_clnt_prog_t clnt_handshake_prog = {
};
rpcsvc_actor_t glusterfs_actors[] = {
- [GLUSTERD_BRICK_NULL] = { "NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_OP] = { "TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0}
+ [GLUSTERD_BRICK_NULL] = {"NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_terminate, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_INFO] = {"TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_translator_info_get, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_translator_op, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_brick_status, NULL, 0, DRC_NA},
+ [GLUSTERD_BRICK_XLATOR_DEFRAG] = {"TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_defrag, NULL, 0, DRC_NA},
+ [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE, glusterfs_handle_nfs_profile, NULL, 0, DRC_NA},
+ [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS, glusterfs_handle_node_status, NULL, 0, DRC_NA},
+ [GLUSTERD_VOLUME_BARRIER_OP] = {"VOLUME BARRIER OP", GLUSTERD_VOLUME_BARRIER_OP, glusterfs_handle_volume_barrier_op, NULL, 0, DRC_NA},
};
struct rpcsvc_program glusterfs_mop_prog = {
@@ -1314,6 +1273,7 @@ struct rpcsvc_program glusterfs_mop_prog = {
.progver = GD_BRICK_VERSION,
.actors = glusterfs_actors,
.numactors = GLUSTERD_BRICK_MAXVALUE,
+ .synctask = _gf_true,
};
int
@@ -1373,156 +1333,10 @@ out:
/* XXX: move these into @ctx */
-static char oldvolfile[131072];
+static char *oldvolfile = NULL;
static int oldvollen = 0;
-static int
-xlator_equal_rec (xlator_t *xl1, xlator_t *xl2)
-{
- xlator_list_t *trav1 = NULL;
- xlator_list_t *trav2 = NULL;
- int ret = 0;
-
- if (xl1 == NULL || xl2 == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return -1;
- }
-
- trav1 = xl1->children;
- trav2 = xl2->children;
-
- while (trav1 && trav2) {
- ret = xlator_equal_rec (trav1->xlator, trav2->xlator);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "xlators children not equal");
- goto out;
- }
-
- trav1 = trav1->next;
- trav2 = trav2->next;
- }
-
- if (trav1 || trav2) {
- ret = -1;
- goto out;
- }
-
- if (strcmp (xl1->name, xl2->name)) {
- ret = -1;
- goto out;
- }
-out :
- return ret;
-}
-
-static gf_boolean_t
-is_graph_topology_equal (glusterfs_graph_t *graph1,
- glusterfs_graph_t *graph2)
-{
- xlator_t *trav1 = NULL;
- xlator_t *trav2 = NULL;
- gf_boolean_t ret = _gf_true;
-
- trav1 = graph1->first;
- trav2 = graph2->first;
-
- ret = xlator_equal_rec (trav1, trav2);
-
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are not equal");
- ret = _gf_false;
- goto out;
- }
-
- ret = _gf_true;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are equal");
-
-out:
- return ret;
-}
-
-/* Function has 3types of return value 0, -ve , 1
- * return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
- * return -1(or -ve) =======> Some Internal Error occurred during the operation
- */
-static int
-glusterfs_volfile_reconfigure (FILE *newvolfile_fp)
-{
- glusterfs_graph_t *oldvolfile_graph = NULL;
- glusterfs_graph_t *newvolfile_graph = NULL;
- FILE *oldvolfile_fp = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- int ret = -1;
-
- oldvolfile_fp = tmpfile ();
- if (!oldvolfile_fp)
- goto out;
-
- if (!oldvollen) {
- ret = 1; // Has to call INIT for the whole graph
- goto out;
- }
- fwrite (oldvolfile, oldvollen, 1, oldvolfile_fp);
- fflush (oldvolfile_fp);
-
-
- oldvolfile_graph = glusterfs_graph_construct (oldvolfile_fp);
- if (!oldvolfile_graph) {
- goto out;
- }
-
- newvolfile_graph = glusterfs_graph_construct (newvolfile_fp);
- if (!newvolfile_graph) {
- goto out;
- }
-
- if (!is_graph_topology_equal (oldvolfile_graph,
- newvolfile_graph)) {
- ret = 1;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Graph topology not equal(should call INIT)");
- goto out;
- }
-
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Only options have changed in the new "
- "graph");
-
- ctx = glusterfs_ctx_get ();
-
- if (!ctx) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx_get() returned NULL");
- goto out;
- }
-
- oldvolfile_graph = ctx->active;
-
- if (!oldvolfile_graph) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx->active is NULL");
- goto out;
- }
-
- /* */
- ret = glusterfs_graph_reconfigure (oldvolfile_graph,
- newvolfile_graph);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Could not reconfigure new options in old graph");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
int
mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
@@ -1534,6 +1348,7 @@ mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
int ret = 0;
ssize_t size = 0;
FILE *tmpfp = NULL;
+ char *volfilebuf = NULL;
frame = myframe;
ctx = frame->this->ctx;
@@ -1553,7 +1368,7 @@ mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
if (-1 == rsp.op_ret) {
gf_log (frame->this->name, GF_LOG_ERROR,
"failed to get the 'volume file' from server");
- ret = -1;
+ ret = rsp.op_errno;
goto out;
}
@@ -1574,6 +1389,10 @@ mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
fwrite (rsp.spec, size, 1, tmpfp);
fflush (tmpfp);
+ if (ferror (tmpfp)) {
+ ret = -1;
+ goto out;
+ }
/* Check if only options have changed. No need to reload the
* volfile if topology hasn't changed.
@@ -1583,10 +1402,19 @@ mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
* return -1(or -ve) =======> Some Internal Error occurred during the operation
*/
- ret = glusterfs_volfile_reconfigure (tmpfp);
+ ret = glusterfs_volfile_reconfigure (oldvollen, tmpfp, ctx, oldvolfile);
if (ret == 0) {
gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
"No need to re-load volfile, reconfigure done");
+ if (oldvolfile)
+ volfilebuf = GF_REALLOC (oldvolfile, size);
+ else
+ volfilebuf = GF_CALLOC (1, size, gf_common_mt_char);
+ if (!volfilebuf) {
+ ret = -1;
+ goto out;
+ }
+ oldvolfile = volfilebuf;
oldvollen = size;
memcpy (oldvolfile, rsp.spec, size);
goto out;
@@ -1598,21 +1426,41 @@ mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
}
ret = glusterfs_process_volfp (ctx, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
if (ret)
goto out;
+ if (oldvolfile)
+ volfilebuf = GF_REALLOC (oldvolfile, size);
+ else
+ volfilebuf = GF_CALLOC (1, size, gf_common_mt_char);
+
+ if (!volfilebuf) {
+ ret = -1;
+ goto out;
+ }
+ oldvolfile = volfilebuf;
oldvollen = size;
memcpy (oldvolfile, rsp.spec, size);
if (!is_mgmt_rpc_reconnect) {
glusterfs_mgmt_pmap_signin (ctx);
- is_mgmt_rpc_reconnect = 1;
+ is_mgmt_rpc_reconnect = _gf_true;
}
out:
STACK_DESTROY (frame->root);
- if (rsp.spec)
- free (rsp.spec);
+ free (rsp.spec);
+
+ emancipate (ctx, ret);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log ("mgmt", GF_LOG_ERROR, "Server is operating at an "
+ "op-version which is not supported");
+ cleanup_and_exit (0);
+ }
if (ret && ctx && !ctx->active) {
/* Do it only for the first time */
@@ -1623,6 +1471,11 @@ out:
ctx->cmd_args.volfile_id);
cleanup_and_exit (0);
}
+
+
+ if (tmpfp)
+ fclose (tmpfp);
+
return 0;
}
@@ -1634,6 +1487,7 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
gf_getspec_req req = {0, };
int ret = 0;
call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
cmd_args = &ctx->cmd_args;
@@ -1642,9 +1496,40 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
req.key = cmd_args->volfile_id;
req.flags = 0;
+ dict = dict_new ();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32 (dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32 (dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize (dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "Failed to serialize dictionary");
+ goto out;
+ }
+
ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
(xdrproc_t)xdr_gf_getspec_req);
+out:
return ret;
}
@@ -1679,8 +1564,7 @@ mgmt_event_notify_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
return ret;
}
@@ -1713,14 +1597,13 @@ glusterfs_rebalance_event_notify_cbk (struct rpc_req *req, struct iovec *iov,
if (-1 == rsp.op_ret) {
gf_log (frame->this->name, GF_LOG_ERROR,
- "Recieved error (%s) from server",
+ "Received error (%s) from server",
strerror (rsp.op_errno));
ret = -1;
goto out;
}
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
+ free (rsp.dict.dict_val); //malloced by xdr
return ret;
}
@@ -1734,7 +1617,7 @@ glusterfs_rebalance_event_notify (dict_t *dict)
cmd_args_t *cmd_args = NULL;
call_frame_t *frame = NULL;
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
cmd_args = &ctx->cmd_args;
frame = create_frame (THIS, ctx->pool);
@@ -1747,7 +1630,7 @@ glusterfs_rebalance_event_notify (dict_t *dict)
gf_log ("", GF_LOG_ERROR, "failed to set volname");
ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ &req.dict.dict_len);
}
ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
@@ -1755,8 +1638,7 @@ glusterfs_rebalance_event_notify (dict_t *dict)
glusterfs_rebalance_event_notify_cbk,
(xdrproc_t)xdr_gf_event_notify_req);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE (req.dict.dict_val);
STACK_DESTROY (frame->root);
return ret;
@@ -1766,48 +1648,83 @@ static int
mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
void *data)
{
- xlator_t *this = NULL;
- cmd_args_t *cmd_args = NULL;
- glusterfs_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
int ret = 0;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ int need_term = 0;
+ int emval = 0;
this = mydata;
+ rpc_trans = rpc->conn.trans;
ctx = this->ctx;
- cmd_args = &ctx->cmd_args;
+
switch (event) {
case RPC_CLNT_DISCONNECT:
if (!ctx->active) {
- cmd_args->max_connect_attempts--;
gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "failed to connect with remote-host: %s",
+ "failed to connect with remote-host: %s (%s)",
+ ctx->cmd_args.volfile_server,
strerror (errno));
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ need_term = 1;
+ emval = ENOTCONN;
+ gf_log("glusterfsd-mgmt", GF_LOG_INFO,
+ "Exhausted all volfile servers");
+ break;
+ }
+ server = list_entry (server->list.next, typeof(*server),
+ list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+
+ ret = dict_set_str (rpc_trans->options,
+ "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to set remote-host: %s",
+ server->volfile_server);
+ need_term = 1;
+ emval = ENOTCONN;
+ break;
+ }
gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
- "%d connect attempts left",
- cmd_args->max_connect_attempts);
- if (0 >= cmd_args->max_connect_attempts)
- cleanup_and_exit (1);
+ "connecting to next volfile server %s",
+ server->volfile_server);
}
break;
case RPC_CLNT_CONNECT:
rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
ret = glusterfs_volfile_fetch (ctx);
- if (ret && ctx && (ctx->active == NULL)) {
- /* Do it only for the first time */
- /* Exit the process.. there is some wrong options */
- gf_log ("mgmt", GF_LOG_ERROR,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
- cleanup_and_exit (0);
+ if (ret) {
+ emval = ret;
+ if (!ctx->active) {
+ need_term = 1;
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ break;
+
+ }
}
if (is_mgmt_rpc_reconnect)
glusterfs_mgmt_pmap_signin (ctx);
+
break;
default:
break;
}
+ if (need_term) {
+ emancipate (ctx, emval);
+ cleanup_and_exit (1);
+ }
+
return 0;
}
@@ -1920,7 +1837,7 @@ glusterfs_listener_stop (glusterfs_ctx_t *ctx)
if (ret) {
this = THIS;
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink linstener "
+ gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
"socket %s, error: %s", cmd_args->sock_file,
strerror (errno));
}
@@ -1928,6 +1845,24 @@ glusterfs_listener_stop (glusterfs_ctx_t *ctx)
}
int
+glusterfs_mgmt_notify (int32_t op, void *data, ...)
+{
+ int ret = 0;
+ switch (op)
+ {
+ case GF_EN_DEFRAG_STATUS:
+ ret = glusterfs_rebalance_event_notify ((dict_t*) data);
+ break;
+
+ default:
+ gf_log ("", GF_LOG_ERROR, "Invalid op");
+ break;
+ }
+
+ return ret;
+}
+
+int
glusterfs_mgmt_init (glusterfs_ctx_t *ctx)
{
cmd_args_t *cmd_args = NULL;
@@ -1962,16 +1897,20 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx)
ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register notify function");
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
goto out;
}
- ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog);
+ ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog, THIS);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register callback function");
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
goto out;
}
+ ctx->notify = glusterfs_mgmt_notify;
+
/* This value should be set before doing the 'rpc_clnt_start()' as
the notify function uses this variable */
ctx->mgmt = rpc;
@@ -2050,7 +1989,7 @@ mgmt_pmap_signin_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
cmd_args = &ctx->cmd_args;
if (!cmd_args->brick_port2) {
@@ -2119,7 +2058,7 @@ mgmt_pmap_signout_cbk (struct rpc_req *req, struct iovec *iov, int count,
goto out;
}
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
if (ret < 0) {
gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 903eac72a..3cb8f0f51 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -77,6 +67,7 @@
#include <fnmatch.h>
#include "rpc-clnt.h"
#include "syncop.h"
+#include "client_t.h"
#include "daemon.h"
@@ -89,14 +80,15 @@
static char gf_doc[] = "";
static char argp_doc[] = "--volfile-server=SERVER [MOUNT-POINT]\n" \
"--volfile=VOLFILE [MOUNT-POINT]";
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2011 Gluster Inc. " \
- "<http://www.gluster.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
+const char *argp_program_version = ""
+ PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n"
+ "Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
static error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
@@ -106,10 +98,6 @@ static struct argp_option gf_options[] = {
{"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
"Server to get the volume file from. This option overrides "
"--volfile option"},
- {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS,
- "MAX-ATTEMPTS", 0, "Maximum number of connect attempts to server. "
- "This option should be provided with --volfile-server option"
- "[default: 1]"},
{"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
"File to use as VOLUME_FILE"},
{"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
@@ -117,7 +105,7 @@ static struct argp_option gf_options[] = {
{"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
"Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
- "CRITICAL and NONE [default: INFO]"},
+ "CRITICAL, TRACE and NONE [default: INFO]"},
{"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
"File to use for logging [default: "
DEFAULT_LOG_FILE_DIRECTORY "/" PACKAGE_NAME ".log" "]"},
@@ -152,6 +140,15 @@ static struct argp_option gf_options[] = {
"Mount the filesystem in 'read-only' mode"},
{"acl", ARGP_ACL_KEY, 0, 0,
"Mount the filesystem with POSIX ACL support"},
+ {"selinux", ARGP_SELINUX_KEY, 0, 0,
+ "Enable SELinux label (extened attributes) support on inodes"},
+#ifdef GF_LINUX_HOST_OS
+ {"aux-gfid-mount", ARGP_AUX_GFID_MOUNT_KEY, 0, 0,
+ "Enable access to filesystem through gfid directly"},
+#endif
+ {"enable-ino32", ARGP_INODE32_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use 32-bit inodes when mounting to workaround broken applications"
+ "that don't support 64-bit inodes"},
{"worm", ARGP_WORM_KEY, 0, 0,
"Mount the filesystem in 'worm' mode"},
{"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
@@ -166,6 +163,8 @@ static struct argp_option gf_options[] = {
"Brick name to be registered with Gluster portmapper" },
{"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
"Brick Port to be registered with Gluster portmapper" },
+ {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Do not purge the cache on file open"},
{0, 0, 0, 0, "Fuse options:"},
{"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
@@ -174,9 +173,20 @@ static struct argp_option gf_options[] = {
"\"on\" for fds not opened with O_RDONLY]"},
{"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
"Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
+ {"negative-timeout", ARGP_NEGATIVE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set negative timeout to SECONDS in fuse kernel module [default: 0]"},
{"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
"Set attribute timeout to SECONDS for inodes in fuse kernel module "
"[default: 1]"},
+ {"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
+ "Set auxilary group list timeout to SECONDS for fuse translator "
+ "[default: 0]"},
+ {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+ "Set fuse module's background queue length to N "
+ "[default: 64]"},
+ {"congestion-threshold", ARGP_FUSE_CONGESTION_THRESHOLD_KEY, "N", 0,
+ "Set fuse module's congestion threshold to N "
+ "[default: 48]"},
{"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
"client will authenticate itself with process id PID to server"},
{"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
@@ -187,6 +197,11 @@ static struct argp_option gf_options[] = {
"Enable strict volume file checking"},
{"mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
"Enable internal memory accounting"},
+ {"fuse-mountopts", ARGP_FUSE_MOUNTOPTS_KEY, "OPTIONS", OPTION_HIDDEN,
+ "Extra mount options to pass to FUSE"},
+ {"use-readdirp", ARGP_FUSE_USE_READDIRP_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use readdirp mode in fuse kernel module"
+ " [default: \"off\"]"},
{0, 0, 0, 0, "Miscellaneous Options:"},
{0, }
};
@@ -200,50 +215,17 @@ int glusterfs_mgmt_init (glusterfs_ctx_t *ctx);
int glusterfs_listener_init (glusterfs_ctx_t *ctx);
int glusterfs_listener_stop (glusterfs_ctx_t *ctx);
-int
-create_fuse_mount (glusterfs_ctx_t *ctx)
+
+static int
+set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
{
int ret = 0;
cmd_args_t *cmd_args = NULL;
- xlator_t *master = NULL;
char *mount_point = NULL;
char cwd[PATH_MAX] = {0,};
cmd_args = &ctx->cmd_args;
- if (!cmd_args->mount_point) {
- gf_log ("", GF_LOG_TRACE,
- "mount point not found, not a client process");
- return 0;
- }
-
- if (ctx->process_mode != GF_CLIENT_PROCESS) {
- gf_log("glusterfsd", GF_LOG_ERROR,
- "Not a client process, not performing mount operation");
- return -1;
- }
-
- master = GF_CALLOC (1, sizeof (*master),
- gfd_mt_xlator_t);
- if (!master)
- goto err;
-
- master->name = gf_strdup ("fuse");
- if (!master->name)
- goto err;
-
- if (xlator_set_type (master, "mount/fuse") == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "MOUNT-POINT %s initialization failed",
- cmd_args->mount_point);
- goto err;
- }
-
- master->ctx = ctx;
- master->options = get_new_dict ();
- if (!master->options)
- goto err;
-
/* Check if mount-point is absolute path,
* if not convert to absolute path by concating with CWD
*/
@@ -265,7 +247,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
} else
mount_point = gf_strdup (cmd_args->mount_point);
- ret = dict_set_dynstr (master->options, ZR_MOUNTPOINT_OPT, mount_point);
+ ret = dict_set_dynstr (options, ZR_MOUNTPOINT_OPT, mount_point);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
"failed to set mount-point to options dictionary");
@@ -273,7 +255,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->fuse_attribute_timeout >= 0) {
- ret = dict_set_double (master->options, ZR_ATTR_TIMEOUT_OPT,
+ ret = dict_set_double (options, ZR_ATTR_TIMEOUT_OPT,
cmd_args->fuse_attribute_timeout);
if (ret < 0) {
@@ -285,7 +267,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->fuse_entry_timeout >= 0) {
- ret = dict_set_double (master->options, ZR_ENTRY_TIMEOUT_OPT,
+ ret = dict_set_double (options, ZR_ENTRY_TIMEOUT_OPT,
cmd_args->fuse_entry_timeout);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -295,8 +277,19 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
}
+ if (cmd_args->fuse_negative_timeout >= 0) {
+ ret = dict_set_double (options, ZR_NEGATIVE_TIMEOUT_OPT,
+ cmd_args->fuse_negative_timeout);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_NEGATIVE_TIMEOUT_OPT);
+ goto err;
+ }
+ }
+
if (cmd_args->client_pid_set) {
- ret = dict_set_int32 (master->options, "client-pid",
+ ret = dict_set_int32 (options, "client-pid",
cmd_args->client_pid);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -307,7 +300,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->uid_map_root) {
- ret = dict_set_int32 (master->options, "uid-map-root",
+ ret = dict_set_int32 (options, "uid-map-root",
cmd_args->uid_map_root);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -318,7 +311,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->volfile_check) {
- ret = dict_set_int32 (master->options, ZR_STRICT_VOLFILE_CHECK,
+ ret = dict_set_int32 (options, ZR_STRICT_VOLFILE_CHECK,
cmd_args->volfile_check);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -329,7 +322,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->dump_fuse) {
- ret = dict_set_static_ptr (master->options, ZR_DUMP_FUSE,
+ ret = dict_set_static_ptr (options, ZR_DUMP_FUSE,
cmd_args->dump_fuse);
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -340,7 +333,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (cmd_args->acl) {
- ret = dict_set_static_ptr (master->options, "acl", "on");
+ ret = dict_set_static_ptr (options, "acl", "on");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
"failed to set dict value for key acl");
@@ -348,8 +341,37 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
}
+ if (cmd_args->selinux) {
+ ret = dict_set_static_ptr (options, "selinux", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key selinux");
+ goto err;
+ }
+ }
+
+ if (cmd_args->aux_gfid_mount) {
+ ret = dict_set_static_ptr (options, "virtual-gfid-access",
+ "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "aux-gfid-mount");
+ goto err;
+ }
+ }
+
+ if (cmd_args->enable_ino32) {
+ ret = dict_set_static_ptr (options, "enable-ino32", "on");
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key enable-ino32");
+ goto err;
+ }
+ }
+
if (cmd_args->read_only) {
- ret = dict_set_static_ptr (master->options, "read-only", "on");
+ ret = dict_set_static_ptr (options, "read-only", "on");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
"failed to set dict value for key read-only");
@@ -357,9 +379,66 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
}
+ switch (cmd_args->fopen_keep_cache) {
+ case GF_OPTION_ENABLE:
+ ret = dict_set_static_ptr(options, "fopen-keep-cache",
+ "on");
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "fopen-keep-cache");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DISABLE:
+ ret = dict_set_static_ptr(options, "fopen-keep-cache",
+ "off");
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key "
+ "fopen-keep-cache");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DEFERRED: /* default */
+ default:
+ gf_log ("glusterfsd", GF_LOG_DEBUG,
+ "fopen-keep-cache mode %d",
+ cmd_args->fopen_keep_cache);
+ break;
+ }
+
+ if (cmd_args->gid_timeout) {
+ ret = dict_set_int32(options, "gid-timeout",
+ cmd_args->gid_timeout);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key gid-timeout");
+ goto err;
+ }
+ }
+ if (cmd_args->background_qlen) {
+ ret = dict_set_int32 (options, "background-qlen",
+ cmd_args->background_qlen);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key background-qlen");
+ goto err;
+ }
+ }
+ if (cmd_args->congestion_threshold) {
+ ret = dict_set_int32 (options, "congestion-threshold",
+ cmd_args->congestion_threshold);
+ if (ret < 0) {
+ gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+ "value for key congestion-threshold");
+ goto err;
+ }
+ }
+
switch (cmd_args->fuse_direct_io_mode) {
case GF_OPTION_DISABLE: /* disable */
- ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
+ ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
"disable");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -369,7 +448,7 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
break;
case GF_OPTION_ENABLE: /* enable */
- ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
+ ret = dict_set_static_ptr (options, ZR_DIRECT_IO_OPT,
"enable");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
@@ -386,8 +465,8 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
if (!cmd_args->no_daemon_mode) {
- ret = dict_set_static_ptr (master->options, "sync-to-mount",
- "enable");
+ ret = dict_set_static_ptr (options, "sync-to-mount",
+ "enable");
if (ret < 0) {
gf_log ("glusterfsd", GF_LOG_ERROR,
"failed to set dict value for key sync-mtab");
@@ -395,6 +474,77 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
}
}
+ if (cmd_args->use_readdirp) {
+ ret = dict_set_str (options, "use-readdirp",
+ cmd_args->use_readdirp);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR, "failed to set dict"
+ " value for key use-readdirp");
+ goto err;
+ }
+ }
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+create_fuse_mount (glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ xlator_t *master = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->mount_point) {
+ gf_log ("", GF_LOG_TRACE,
+ "mount point not found, not a client process");
+ return 0;
+ }
+
+ if (ctx->process_mode != GF_CLIENT_PROCESS) {
+ gf_log("glusterfsd", GF_LOG_ERROR,
+ "Not a client process, not performing mount operation");
+ return -1;
+ }
+
+ master = GF_CALLOC (1, sizeof (*master),
+ gfd_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup ("fuse");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type (master, "mount/fuse") == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "MOUNT-POINT %s initialization failed",
+ cmd_args->mount_point);
+ goto err;
+ }
+
+ master->ctx = ctx;
+ master->options = get_new_dict ();
+ if (!master->options)
+ goto err;
+
+ ret = set_fuse_mount_options (ctx, master->options);
+ if (ret)
+ goto err;
+
+ if (cmd_args->fuse_mountopts) {
+ ret = dict_set_static_ptr (master->options, ZR_FUSE_MOUNTOPTS,
+ cmd_args->fuse_mountopts);
+ if (ret < 0) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "failed to set dict value for key %s",
+ ZR_FUSE_MOUNTOPTS);
+ goto err;
+ }
+ }
+
ret = xlator_init (master);
if (ret) {
gf_log ("", GF_LOG_DEBUG, "failed to initialize fuse translator");
@@ -410,7 +560,7 @@ err:
xlator_destroy (master);
}
- return -1;
+ return 1;
}
@@ -446,7 +596,58 @@ get_volfp (glusterfs_ctx_t *ctx)
}
static int
-gf_remember_xlator_option (struct list_head *options, char *arg)
+gf_remember_backup_volfile_server (char *arg)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ server_cmdline_t *server = NULL;
+
+ ctx = glusterfsd_ctx;
+ if (!ctx)
+ goto out;
+ cmd_args = &ctx->cmd_args;
+
+ if(!cmd_args)
+ goto out;
+
+ server = GF_CALLOC (1, sizeof (server_cmdline_t),
+ gfd_mt_server_cmdline_t);
+ if (!server)
+ goto out;
+
+ INIT_LIST_HEAD(&server->list);
+
+ server->volfile_server = gf_strdup(arg);
+
+ if (!cmd_args->volfile_server) {
+ cmd_args->volfile_server = server->volfile_server;
+ cmd_args->curr_server = server;
+ }
+
+ if (!server->volfile_server) {
+ gf_log ("", GF_LOG_WARNING,
+ "xlator option %s is invalid", arg);
+ goto out;
+ }
+
+ list_add_tail (&server->list, &cmd_args->volfile_servers);
+
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (server) {
+ GF_FREE (server->volfile_server);
+ GF_FREE (server);
+ }
+ }
+
+ return ret;
+
+}
+
+static int
+gf_remember_xlator_option (char *arg)
{
glusterfs_ctx_t *ctx = NULL;
cmd_args_t *cmd_args = NULL;
@@ -455,7 +656,7 @@ gf_remember_xlator_option (struct list_head *options, char *arg)
char *dot = NULL;
char *equals = NULL;
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
cmd_args = &ctx->cmd_args;
option = GF_CALLOC (1, sizeof (xlator_cmdline_option_t),
@@ -507,12 +708,9 @@ gf_remember_xlator_option (struct list_head *options, char *arg)
out:
if (ret == -1) {
if (option) {
- if (option->volume)
- GF_FREE (option->volume);
- if (option->key)
- GF_FREE (option->key);
- if (option->value)
- GF_FREE (option->value);
+ GF_FREE (option->volume);
+ GF_FREE (option->key);
+ GF_FREE (option->value);
GF_FREE (option);
}
@@ -526,7 +724,6 @@ out:
static error_t
parse_opts (int key, char *arg, struct argp_state *state)
{
- glusterfs_ctx_t *ctx = NULL;
cmd_args_t *cmd_args = NULL;
uint32_t n = 0;
double d = 0.0;
@@ -541,19 +738,8 @@ parse_opts (int key, char *arg, struct argp_state *state)
switch (key) {
case ARGP_VOLFILE_SERVER_KEY:
- cmd_args->volfile_server = gf_strdup (arg);
- break;
-
- case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
- n = 0;
+ gf_remember_backup_volfile_server (arg);
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->max_connect_attempts = n;
- break;
- }
-
- argp_failure (state, -1, 0,
- "Invalid limit on connect attempts %s", arg);
break;
case ARGP_READ_ONLY_KEY:
@@ -562,6 +748,20 @@ parse_opts (int key, char *arg, struct argp_state *state)
case ARGP_ACL_KEY:
cmd_args->acl = 1;
+ gf_remember_xlator_option ("*-md-cache.cache-posix-acl=true");
+ break;
+
+ case ARGP_SELINUX_KEY:
+ cmd_args->selinux = 1;
+ gf_remember_xlator_option ("*-md-cache.cache-selinux=true");
+ break;
+
+ case ARGP_AUX_GFID_MOUNT_KEY:
+ cmd_args->aux_gfid_mount = 1;
+ break;
+
+ case ARGP_INODE32_KEY:
+ cmd_args->enable_ino32 = 1;
break;
case ARGP_WORM_KEY:
@@ -583,8 +783,7 @@ parse_opts (int key, char *arg, struct argp_state *state)
break;
case ARGP_VOLUME_FILE_KEY:
- if (cmd_args->volfile)
- GF_FREE (cmd_args->volfile);
+ GF_FREE (cmd_args->volfile);
if (arg[0] != '/') {
pwd = getcwd (NULL, PATH_MAX);
@@ -706,6 +905,18 @@ parse_opts (int key, char *arg, struct argp_state *state)
argp_failure (state, -1, 0, "unknown entry timeout %s", arg);
break;
+ case ARGP_NEGATIVE_TIMEOUT_KEY:
+ d = 0.0;
+
+ gf_string2double (arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_negative_timeout = d;
+ break;
+ }
+
+ argp_failure (state, -1, 0, "unknown negative timeout %s", arg);
+ break;
+
case ARGP_ATTRIBUTE_TIMEOUT_KEY:
d = 0.0;
@@ -747,8 +958,9 @@ parse_opts (int key, char *arg, struct argp_state *state)
break;
case ARGP_XLATOR_OPTION_KEY:
- if (gf_remember_xlator_option (&cmd_args->xlator_options, arg))
- argp_failure (state, -1, 0, "invalid xlator option %s", arg);
+ if (gf_remember_xlator_option (arg))
+ argp_failure (state, -1, 0, "invalid xlator option %s",
+ arg);
break;
@@ -792,10 +1004,68 @@ parse_opts (int key, char *arg, struct argp_state *state)
case ARGP_MEM_ACCOUNTING_KEY:
/* TODO: it should have got handled much earlier */
- ctx = glusterfs_ctx_get ();
- ctx->mem_accounting = 1;
+ //gf_mem_acct_enable_set (THIS->ctx);
break;
- }
+
+ case ARGP_FOPEN_KEEP_CACHE_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ cmd_args->fopen_keep_cache = b;
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown cache setting \"%s\"", arg);
+
+ break;
+
+ case ARGP_GID_TIMEOUT_KEY:
+ if (!gf_string2int(arg, &cmd_args->gid_timeout))
+ break;
+
+ argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
+ break;
+ case ARGP_FUSE_BACKGROUND_QLEN_KEY:
+ if (!gf_string2int (arg, &cmd_args->background_qlen))
+ break;
+
+ argp_failure (state, -1, 0,
+ "unknown background qlen option %s", arg);
+ break;
+ case ARGP_FUSE_CONGESTION_THRESHOLD_KEY:
+ if (!gf_string2int (arg, &cmd_args->congestion_threshold))
+ break;
+
+ argp_failure (state, -1, 0,
+ "unknown congestion threshold option %s", arg);
+ break;
+
+ case ARGP_FUSE_MOUNTOPTS_KEY:
+ cmd_args->fuse_mountopts = gf_strdup (arg);
+ break;
+
+ case ARGP_FUSE_USE_READDIRP_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ if (b) {
+ cmd_args->use_readdirp = "yes";
+ } else {
+ cmd_args->use_readdirp = "no";
+ }
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown use-readdirp setting \"%s\"", arg);
+ break;
+
+ }
return 0;
}
@@ -807,7 +1077,7 @@ cleanup_and_exit (int signum)
glusterfs_ctx_t *ctx = NULL;
xlator_t *trav = NULL;
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
if (!ctx)
return;
@@ -820,9 +1090,17 @@ cleanup_and_exit (int signum)
ctx->cleanup_started = 1;
glusterfs_mgmt_pmap_signout (ctx);
- if (ctx->listener) {
- (void) glusterfs_listener_stop (ctx);
- }
+
+ /* below part is a racy code where the rpcsvc object is freed.
+ * But in another thread (epoll thread), upon poll error in the
+ * socket the transports are cleaned up where again rpcsvc object
+ * is accessed (which is already freed by the below function).
+ * Since the process is about to be killed dont execute the function
+ * below.
+ */
+ /* if (ctx->listener) { */
+ /* (void) glusterfs_listener_stop (ctx); */
+ /* } */
/* Call fini() of FUSE xlator first:
* so there are no more requests coming and
@@ -866,7 +1144,7 @@ reincarnate (int signum)
glusterfs_ctx_t *ctx = NULL;
cmd_args_t *cmd_args = NULL;
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfsd_ctx;
cmd_args = &ctx->cmd_args;
if (cmd_args->volfile_server) {
@@ -888,37 +1166,17 @@ reincarnate (int signum)
return;
}
-
-static char *
-generate_uuid ()
+void
+emancipate (glusterfs_ctx_t *ctx, int ret)
{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
+ /* break free from the parent */
+ if (ctx->daemon_pipe[1] != -1) {
+ write (ctx->daemon_pipe[1], (void *) &ret, sizeof (ret));
+ close (ctx->daemon_pipe[1]);
+ ctx->daemon_pipe[1] = -1;
}
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%" GF_PRI_SUSECONDS,
- hostname, getpid(), now_str, tv.tv_usec);
-
- return gf_strdup (tmp_str);
}
-
static uint8_t
gf_get_process_mode (char *exec_name)
{
@@ -942,82 +1200,20 @@ gf_get_process_mode (char *exec_name)
}
-
-static int
-set_log_file_path (cmd_args_t *cmd_args)
-{
- int i = 0;
- int j = 0;
- int ret = 0;
- int port = 0;
- char *tmp_ptr = NULL;
- char tmp_str[1024] = {0,};
-
- if (cmd_args->mount_point) {
- j = 0;
- i = 0;
- if (cmd_args->mount_point[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->mount_point); i++,j++) {
- tmp_str[j] = cmd_args->mount_point[i];
- if (cmd_args->mount_point[i] == '/')
- tmp_str[j] = '-';
- }
-
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
-
- if (cmd_args->volfile) {
- j = 0;
- i = 0;
- if (cmd_args->volfile[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->volfile); i++,j++) {
- tmp_str[j] = cmd_args->volfile[i];
- if (cmd_args->volfile[i] == '/')
- tmp_str[j] = '-';
- }
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
-
- if (cmd_args->volfile_server) {
- port = 1;
- tmp_ptr = "default";
-
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
- if (cmd_args->volfile_id)
- tmp_ptr = cmd_args->volfile_id;
-
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
- cmd_args->volfile_server, tmp_ptr, port);
- }
-done:
- return ret;
-}
-
-
static int
glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- struct rlimit lim = {0, };
- call_pool_t *pool = NULL;
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {0, };
+ int ret = -1;
xlator_mem_acct_init (THIS, gfd_mt_end);
- ctx->process_uuid = generate_uuid ();
+ ctx->process_uuid = generate_glusterfs_ctx_id ();
if (!ctx->process_uuid) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs uuid generation failed");
- return -1;
+ goto out;
}
ctx->page_size = 128 * GF_UNIT_KB;
@@ -1026,65 +1222,67 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
if (!ctx->iobuf_pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs iobuf pool creation failed");
- return -1;
+ goto out;
}
ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
if (!ctx->event_pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs event pool creation failed");
- return -1;
+ goto out;
}
- pool = GF_CALLOC (1, sizeof (call_pool_t),
- gfd_mt_call_pool_t);
- if (!pool) {
+ ctx->pool = GF_CALLOC (1, sizeof (call_pool_t), gfd_mt_call_pool_t);
+ if (!ctx->pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs call pool creation failed");
- return -1;
+ goto out;
}
+ INIT_LIST_HEAD (&ctx->pool->all_frames);
+ LOCK_INIT (&ctx->pool->lock);
+
/* frame_mem_pool size 112 * 4k */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
- if (!pool->frame_mem_pool) {
+ ctx->pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
+ if (!ctx->pool->frame_mem_pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs frame pool creation failed");
- return -1;
+ goto out;
}
/* stack_mem_pool size 256 * 1024 */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
- if (!pool->stack_mem_pool) {
+ ctx->pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
+ if (!ctx->pool->stack_mem_pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs stack pool creation failed");
- return -1;
+ goto out;
}
ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
if (!ctx->stub_mem_pool) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs stub pool creation failed");
- return -1;
+ goto out;
}
ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
if (!ctx->dict_pool)
- return -1;
+ goto out;
ctx->dict_pair_pool = mem_pool_new (data_pair_t,
GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
if (!ctx->dict_pair_pool)
- return -1;
+ goto out;
ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
if (!ctx->dict_data_pool)
- return -1;
-
- INIT_LIST_HEAD (&pool->all_frames);
- LOCK_INIT (&pool->lock);
- ctx->pool = pool;
+ goto out;
pthread_mutex_init (&(ctx->lock), NULL);
+ ctx->clienttable = gf_clienttable_alloc();
+ if (!ctx->clienttable)
+ goto out;
+
cmd_args = &ctx->cmd_args;
/* parsing command line arguments */
@@ -1101,33 +1299,68 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
#endif
cmd_args->fuse_attribute_timeout = -1;
cmd_args->fuse_entry_timeout = -1;
+ cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
INIT_LIST_HEAD (&cmd_args->xlator_options);
+ INIT_LIST_HEAD (&cmd_args->volfile_servers);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
setrlimit (RLIMIT_CORE, &lim);
- return 0;
+ ret = 0;
+out:
+
+ if (ret && ctx) {
+ if (ctx->pool) {
+ mem_pool_destroy (ctx->pool->frame_mem_pool);
+ mem_pool_destroy (ctx->pool->stack_mem_pool);
+ }
+ GF_FREE (ctx->pool);
+ mem_pool_destroy (ctx->stub_mem_pool);
+ mem_pool_destroy (ctx->dict_pool);
+ mem_pool_destroy (ctx->dict_data_pool);
+ mem_pool_destroy (ctx->dict_pair_pool);
+ }
+
+ return ret;
}
static int
-logging_init (glusterfs_ctx_t *ctx)
+logging_init (glusterfs_ctx_t *ctx, const char *progpath)
{
cmd_args_t *cmd_args = NULL;
int ret = 0;
+ char ident[1024] = {0,};
+ char *progname = NULL;
+ char *ptr = NULL;
cmd_args = &ctx->cmd_args;
if (cmd_args->log_file == NULL) {
- ret = set_log_file_path (cmd_args);
+ ret = gf_set_log_file_path (cmd_args);
if (ret == -1) {
fprintf (stderr, "ERROR: failed to set the log file path\n");
return -1;
}
}
- if (gf_log_init (cmd_args->log_file) == -1) {
+#ifdef GF_USE_SYSLOG
+ progname = gf_strdup (progpath);
+ snprintf (ident, 1024, "%s_%s", basename(progname),
+ basename(cmd_args->log_file));
+ GF_FREE (progname);
+ /* remove .log suffix */
+ if (NULL != (ptr = strrchr(ident, '.'))) {
+ if (strcmp(ptr, ".log") == 0) {
+ /* note: ptr points to location in ident only */
+ ptr[0] = '\0';
+ }
+ }
+ ptr = ident;
+#endif
+
+ if (gf_log_init (ctx, cmd_args->log_file, ptr) == -1) {
fprintf (stderr, "ERROR: failed to open logfile %s\n",
cmd_args->log_file);
return -1;
@@ -1139,12 +1372,12 @@ logging_init (glusterfs_ctx_t *ctx)
}
void
-gf_check_and_set_mem_acct (int argc, char *argv[], glusterfs_ctx_t *ctx)
+gf_check_and_set_mem_acct (glusterfs_ctx_t *ctx, int argc, char *argv[])
{
int i = 0;
for (i = 0; i < argc; i++) {
if (strcmp (argv[i], "--mem-accounting") == 0) {
- ctx->mem_accounting = 1;
+ gf_mem_acct_enable_set (ctx);
break;
}
}
@@ -1153,16 +1386,14 @@ gf_check_and_set_mem_acct (int argc, char *argv[], glusterfs_ctx_t *ctx)
int
parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
{
- int process_mode = 0;
- int ret = 0;
- struct stat stbuf = {0, };
- struct tm *tm = NULL;
- time_t utime;
- char timestr[256];
- char tmp_logfile[1024] = { 0 };
- char *tmp_logfile_dyn = NULL;
- char *tmp_logfilebase = NULL;
- cmd_args_t *cmd_args = NULL;
+ int process_mode = 0;
+ int ret = 0;
+ struct stat stbuf = {0, };
+ char timestr[32];
+ char tmp_logfile[1024] = { 0 };
+ char *tmp_logfile_dyn = NULL;
+ char *tmp_logfilebase = NULL;
+ cmd_args_t *cmd_args = NULL;
cmd_args = &ctx->cmd_args;
@@ -1219,8 +1450,8 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
(S_ISREG (stbuf.st_mode) || S_ISLNK (stbuf.st_mode))) ||
(ret == -1)) {
/* Have separate logfile per run */
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y%m%d.%H%M%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL),
+ gf_timefmt_FT);
sprintf (tmp_logfile, "%s.%s.%d",
cmd_args->log_file, timestr, getpid ());
@@ -1258,7 +1489,7 @@ int
glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
{
cmd_args_t *cmd_args = NULL;
- int ret = 0;
+ int ret = -1;
FILE *pidfp = NULL;
cmd_args = &ctx->cmd_args;
@@ -1271,7 +1502,7 @@ glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
gf_log ("glusterfsd", GF_LOG_ERROR,
"pidfile %s error (%s)",
cmd_args->pid_file, strerror (errno));
- return -1;
+ goto out;
}
ret = lockf (fileno (pidfp), F_TLOCK, 0);
@@ -1279,7 +1510,7 @@ glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
gf_log ("glusterfsd", GF_LOG_ERROR,
"pidfile %s lock error (%s)",
cmd_args->pid_file, strerror (errno));
- return ret;
+ goto out;
}
gf_log ("glusterfsd", GF_LOG_TRACE,
@@ -1291,12 +1522,17 @@ glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
gf_log ("glusterfsd", GF_LOG_ERROR,
"pidfile %s unlock error (%s)",
cmd_args->pid_file, strerror (errno));
- return ret;
+ goto out;
}
ctx->pidfp = pidfp;
- return 0;
+ ret = 0;
+out:
+ if (ret && pidfp)
+ fclose (pidfp);
+
+ return ret;
}
@@ -1409,10 +1645,10 @@ glusterfs_sigwaiter (void *arg)
reincarnate (sig);
break;
case SIGUSR1:
- gf_proc_dump_info (sig);
+ gf_proc_dump_info (sig, glusterfsd_ctx);
break;
case SIGUSR2:
- gf_latency_toggle (sig);
+ gf_latency_toggle (sig, glusterfsd_ctx);
break;
default:
@@ -1424,6 +1660,13 @@ glusterfs_sigwaiter (void *arg)
}
+void
+glusterfsd_print_trace (int signum)
+{
+ gf_print_trace (signum, glusterfsd_ctx);
+}
+
+
int
glusterfs_signals_setup (glusterfs_ctx_t *ctx)
{
@@ -1433,12 +1676,12 @@ glusterfs_signals_setup (glusterfs_ctx_t *ctx)
sigemptyset (&set);
/* common setting for all threads */
- signal (SIGSEGV, gf_print_trace);
- signal (SIGABRT, gf_print_trace);
- signal (SIGILL, gf_print_trace);
- signal (SIGTRAP, gf_print_trace);
- signal (SIGFPE, gf_print_trace);
- signal (SIGBUS, gf_print_trace);
+ signal (SIGSEGV, glusterfsd_print_trace);
+ signal (SIGABRT, glusterfsd_print_trace);
+ signal (SIGILL, glusterfsd_print_trace);
+ signal (SIGTRAP, glusterfsd_print_trace);
+ signal (SIGFPE, glusterfsd_print_trace);
+ signal (SIGBUS, glusterfsd_print_trace);
signal (SIGINT, cleanup_and_exit);
signal (SIGPIPE, SIG_IGN);
@@ -1450,7 +1693,7 @@ glusterfs_signals_setup (glusterfs_ctx_t *ctx)
ret = pthread_sigmask (SIG_BLOCK, &set, NULL);
if (ret) {
- gf_log ("", GF_LOG_WARNING,
+ gf_log ("glusterfsd", GF_LOG_WARNING,
"failed to execute pthread_signmask %s",
strerror (errno));
return ret;
@@ -1464,7 +1707,7 @@ glusterfs_signals_setup (glusterfs_ctx_t *ctx)
fallback to signals getting handled by other threads.
setup the signal handlers
*/
- gf_log ("", GF_LOG_WARNING,
+ gf_log ("glusterfsd", GF_LOG_WARNING,
"failed to create pthread %s",
strerror (errno));
return ret;
@@ -1480,6 +1723,7 @@ daemonize (glusterfs_ctx_t *ctx)
int ret = -1;
cmd_args_t *cmd_args = NULL;
int cstatus = 0;
+ int err = 0;
cmd_args = &ctx->cmd_args;
@@ -1493,15 +1737,36 @@ daemonize (glusterfs_ctx_t *ctx)
if (cmd_args->debug_mode)
goto postfork;
+ ret = pipe (ctx->daemon_pipe);
+ if (ret) {
+ /* If pipe() fails, retain daemon_pipe[] = {-1, -1}
+ and parent will just not wait for child status
+ */
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+ }
+
ret = os_daemon_return (0, 0);
switch (ret) {
case -1:
+ if (ctx->daemon_pipe[0] != -1) {
+ close (ctx->daemon_pipe[0]);
+ close (ctx->daemon_pipe[1]);
+ }
+
gf_log ("daemonize", GF_LOG_ERROR,
"Daemonization failed: %s", strerror(errno));
goto out;
case 0:
+ /* child */
+ /* close read */
+ close (ctx->daemon_pipe[0]);
break;
default:
+ /* parent */
+ /* close write */
+ close (ctx->daemon_pipe[1]);
+
if (ctx->mnt_pid > 0) {
ret = waitpid (ctx->mnt_pid, &cstatus, 0);
if (!(ret == ctx->mnt_pid && cstatus == 0)) {
@@ -1510,7 +1775,10 @@ daemonize (glusterfs_ctx_t *ctx)
exit (1);
}
}
- _exit (0);
+
+ err = 1;
+ read (ctx->daemon_pipe[0], (void *)&err, sizeof (err));
+ _exit (err);
}
postfork:
@@ -1559,7 +1827,7 @@ glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp)
goto out;
}
- gf_log_volume_file (fp);
+ gf_log_dump_graph (fp, graph);
ret = 0;
out:
@@ -1592,7 +1860,8 @@ glusterfs_volumes_init (glusterfs_ctx_t *ctx)
if (cmd_args->volfile_server) {
ret = glusterfs_mgmt_init (ctx);
- goto out;
+ /* return, do not emancipate() yet */
+ return ret;
}
fp = get_volfp (ctx);
@@ -1609,30 +1878,42 @@ glusterfs_volumes_init (glusterfs_ctx_t *ctx)
goto out;
out:
+ emancipate (ctx, ret);
return ret;
}
+/* This is the only legal global pointer */
+glusterfs_ctx_t *glusterfsd_ctx;
+
int
main (int argc, char *argv[])
{
glusterfs_ctx_t *ctx = NULL;
int ret = -1;
+ char cmdlinestr[PATH_MAX] = {0,};
- ret = glusterfs_globals_init ();
- if (ret)
- return ret;
-
- ctx = glusterfs_ctx_get ();
+ ctx = glusterfs_ctx_new ();
if (!ctx) {
gf_log ("glusterfs", GF_LOG_CRITICAL,
"ERROR: glusterfs context not initialized");
return ENOMEM;
}
-#ifndef DEBUG
+ glusterfsd_ctx = ctx;
+
+#ifdef DEBUG
+ gf_mem_acct_enable_set (ctx);
+#else
/* Enable memory accounting on the fly based on argument */
- gf_check_and_set_mem_acct (argc, argv, ctx);
+ gf_check_and_set_mem_acct (ctx, argc, argv);
#endif
+
+ ret = glusterfs_globals_init (ctx);
+ if (ret)
+ return ret;
+
+ THIS->ctx = ctx;
+
ret = glusterfs_ctx_defaults_init (ctx);
if (ret)
goto out;
@@ -1641,14 +1922,23 @@ main (int argc, char *argv[])
if (ret)
goto out;
- ret = logging_init (ctx);
+ ret = logging_init (ctx, argv[0]);
if (ret)
goto out;
- /* log the version of glusterfs running here */
- gf_log (argv[0], GF_LOG_INFO,
- "Started running %s version %s",
- argv[0], PACKAGE_VERSION);
+ /* log the version of glusterfs running here along with the actual
+ command line options. */
+ {
+ int i = 0;
+ strcpy (cmdlinestr, argv[0]);
+ for (i = 1; i < argc; i++) {
+ strcat (cmdlinestr, " ");
+ strcat (cmdlinestr, argv[i]);
+ }
+ gf_log (argv[0], GF_LOG_INFO,
+ "Started running %s version %s (%s)",
+ argv[0], PACKAGE_VERSION, cmdlinestr);
+ }
gf_proc_dump_init();
@@ -1660,7 +1950,7 @@ main (int argc, char *argv[])
if (ret)
goto out;
- ctx->env = syncenv_new (0);
+ ctx->env = syncenv_new (0, 0, 0);
if (!ctx->env) {
gf_log ("", GF_LOG_ERROR,
"Could not create new sync-environment");
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 21f1cbb6c..9e2a0e56e 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_H__
#define __GLUSTERFSD_H__
@@ -30,8 +20,6 @@
#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_LEVEL GF_LOG_INFO
#define DEFAULT_EVENT_POOL_SIZE 16384
@@ -63,6 +51,7 @@ enum argp_option_keys {
ARGP_NO_DAEMON_KEY = 'N',
ARGP_RUN_ID_KEY = 'r',
ARGP_DEBUG_KEY = 133,
+ ARGP_NEGATIVE_TIMEOUT_KEY = 134,
ARGP_ENTRY_TIMEOUT_KEY = 135,
ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
ARGP_VOLUME_NAME_KEY = 137,
@@ -86,6 +75,15 @@ enum argp_option_keys {
ARGP_WORM_KEY = 155,
ARGP_USER_MAP_ROOT_KEY = 156,
ARGP_MEM_ACCOUNTING_KEY = 157,
+ ARGP_SELINUX_KEY = 158,
+ ARGP_FOPEN_KEEP_CACHE_KEY = 159,
+ ARGP_GID_TIMEOUT_KEY = 160,
+ ARGP_FUSE_BACKGROUND_QLEN_KEY = 161,
+ ARGP_FUSE_CONGESTION_THRESHOLD_KEY = 162,
+ ARGP_INODE32_KEY = 163,
+ ARGP_FUSE_MOUNTOPTS_KEY = 164,
+ ARGP_FUSE_USE_READDIRP_KEY = 165,
+ ARGP_AUX_GFID_MOUNT_KEY = 166,
};
struct _gfd_vol_top_priv_t {
@@ -104,6 +102,12 @@ int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
void cleanup_and_exit (int signum);
-void *glusterfs_volume_top_read_perf (void *args);
-void *glusterfs_volume_top_write_perf (void *args);
+int glusterfs_volume_top_write_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time);
+int glusterfs_volume_top_read_perf (uint32_t blk_size, uint32_t blk_count,
+ char *brick_path, double *throughput,
+ double *time);
+
+extern glusterfs_ctx_t *glusterfsd_ctx;
#endif /* __GLUSTERFSD_H__ */
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
new file mode 100644
index 000000000..d654280d0
--- /dev/null
+++ b/libgfchangelog.pc.in
@@ -0,0 +1,11 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: libgfchangelog
+Description: GlusterFS Changelog Consumer Library
+Version: @VERSION@
+Libs: -L${libdir} -lgfchangelog -lglusterfs
+Cflags: -I${includedir}/glusterfs/gfchangelog -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 2dab8735c..907399ae6 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -1,10 +1,10 @@
-libglusterfs_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) \
- $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+libglusterfs_la_CFLAGS = -Wall $(GF_CFLAGS) \
+ $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
-libglusterfs_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 \
- -D_GNU_SOURCE -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
- -D$(GF_HOST_OS) -I$(CONTRIBDIR)/rbtree \
- -DSCHEDULERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler\"
+libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree
libglusterfs_la_LIBADD = @LEXLIB@
@@ -15,36 +15,42 @@ CONTRIB_BUILDDIR = $(top_builddir)/contrib
libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c \
compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c \
- iobuf.c globals.c statedump.c stack.c checksum.c daemon.c \
- $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c \
+ iobuf.c globals.c statedump.c stack.c checksum.c daemon.c timespec.c \
+ $(CONTRIBDIR)/rbtree/rb.c rbthash.c store.c latency.c \
graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c \
$(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c \
$(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \
$(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \
$(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \
- graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c event-history.c
+ graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c \
+ event-history.c gidcache.c ctx.c client_t.c event-poll.c event-epoll.c \
+ $(CONTRIBDIR)/libgen/basename_r.c $(CONTRIBDIR)/libgen/dirname_r.c \
+ $(CONTRIBDIR)/stdlib/gf_mkostemp.c
-nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c
+
+nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c gf-error-codes.h
BUILT_SOURCES = graph.lex.c
-noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h \
+noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h timespec.h \
logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h \
fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h \
gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \
- checksum.h daemon.h $(CONTRIBDIR)/rbtree/rb.h \
+ checksum.h daemon.h $(CONTRIBDIR)/rbtree/rb.h store.h\
rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \
$(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
- $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h run.h \
- options.h lkowner.h fd-lk.h circ-buff.h event-history.h
+ $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h \
+ run.h options.h lkowner.h fd-lk.h circ-buff.h event-history.h \
+ gidcache.h client_t.h glusterfs-acl.h
EXTRA_DIST = graph.l graph.y
graph.lex.c: graph.l y.tab.h
- $(LEX) -t $(srcdir)/graph.l > $@
+ $(LEX) -Pgraphyy -t $(srcdir)/graph.l > $@
+y.tab.c: y.tab.h
y.tab.h: graph.y
- $(YACC) -d $(srcdir)/graph.y
+ $(YACC) -p graphyy -d $(srcdir)/graph.y
CLEANFILES = graph.lex.c y.tab.c y.tab.h
CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 85a1aaa7b..ac79cf071 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -37,15 +37,15 @@ stub_new (call_frame_t *frame,
new->fop = fop;
new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
INIT_LIST_HEAD (&new->list);
+
+ INIT_LIST_HEAD (&new->args_cbk.entries);
out:
return new;
}
call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
+fop_lookup_stub (call_frame_t *frame, fop_lookup_t fn, loc_t *loc,
dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -56,11 +56,11 @@ fop_lookup_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_LOOKUP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lookup.fn = fn;
+ stub->fn.lookup = fn;
- loc_copy (&stub->args.lookup.loc, loc);
+ loc_copy (&stub->args.loc, loc);
if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
@@ -68,14 +68,10 @@ out:
call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent)
+fop_lookup_cbk_stub (call_frame_t *frame, fop_lookup_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
call_stub_t *stub = NULL;
@@ -84,27 +80,24 @@ fop_lookup_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_LOOKUP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lookup_cbk.fn = fn;
- stub->args.lookup_cbk.op_ret = op_ret;
- stub->args.lookup_cbk.op_errno = op_errno;
+ stub->fn_cbk.lookup = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (inode)
- stub->args.lookup_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.lookup_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (postparent)
- stub->args.lookup_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
-
call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
+fop_stat_stub (call_frame_t *frame, fop_stat_t fn,
loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -115,21 +108,18 @@ fop_stat_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_STAT);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.stat.fn = fn;
- loc_copy (&stub->args.stat.loc, loc);
+ stub->fn.stat = fn;
+ loc_copy (&stub->args.loc, loc);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_stat_cbk_stub (call_frame_t *frame, fop_stat_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
struct iatt *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -139,22 +129,20 @@ fop_stat_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_STAT);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.stat_cbk.fn = fn;
- stub->args.stat_cbk.op_ret = op_ret;
- stub->args.stat_cbk.op_errno = op_errno;
+ stub->fn_cbk.stat = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (op_ret == 0)
- stub->args.stat_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
+fop_fstat_stub (call_frame_t *frame, fop_fstat_t fn,
fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -164,23 +152,20 @@ fop_fstat_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FSTAT);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fstat.fn = fn;
+ stub->fn.fstat = fn;
if (fd)
- stub->args.fstat.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_fstat_cbk_stub (call_frame_t *frame, fop_fstat_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
struct iatt *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -190,26 +175,21 @@ fop_fstat_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FSTAT);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fstat_cbk.fn = fn;
- stub->args.fstat_cbk.op_ret = op_ret;
- stub->args.fstat_cbk.op_errno = op_errno;
+ stub->fn_cbk.fstat = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (buf)
- stub->args.fstat_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
-/* truncate */
-
call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off, dict_t *xdata)
+fop_truncate_stub (call_frame_t *frame, fop_truncate_t fn,
+ loc_t *loc, off_t off, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -219,24 +199,20 @@ fop_truncate_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.truncate.fn = fn;
- loc_copy (&stub->args.truncate.loc, loc);
- stub->args.truncate.off = off;
+ stub->fn.truncate = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.offset = off;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_truncate_cbk_stub (call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -245,26 +221,23 @@ fop_truncate_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.truncate_cbk.fn = fn;
- stub->args.truncate_cbk.op_ret = op_ret;
- stub->args.truncate_cbk.op_errno = op_errno;
+ stub->fn_cbk.truncate = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (prebuf)
- stub->args.truncate_cbk.prebuf = *prebuf;
+ stub->args_cbk.prestat = *prebuf;
if (postbuf)
- stub->args.truncate_cbk.postbuf = *postbuf;
+ stub->args_cbk.poststat = *postbuf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off, dict_t *xdata)
+fop_ftruncate_stub (call_frame_t *frame, fop_ftruncate_t fn,
+ fd_t *fd, off_t off, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -273,26 +246,22 @@ fop_ftruncate_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.ftruncate.fn = fn;
+ stub->fn.ftruncate = fn;
if (fd)
- stub->args.ftruncate.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
- stub->args.ftruncate.off = off;
+ stub->args.offset = off;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_ftruncate_cbk_stub (call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -301,15 +270,15 @@ fop_ftruncate_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.ftruncate_cbk.fn = fn;
- stub->args.ftruncate_cbk.op_ret = op_ret;
- stub->args.ftruncate_cbk.op_errno = op_errno;
+ stub->fn_cbk.ftruncate = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (prebuf)
- stub->args.ftruncate_cbk.prebuf = *prebuf;
+ stub->args_cbk.prestat = *prebuf;
if (postbuf)
- stub->args.ftruncate_cbk.postbuf = *postbuf;
+ stub->args_cbk.poststat = *postbuf;
if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
@@ -317,10 +286,8 @@ out:
call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask, dict_t *xdata)
+fop_access_stub (call_frame_t *frame, fop_access_t fn,
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -330,22 +297,19 @@ fop_access_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_ACCESS);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.access.fn = fn;
- loc_copy (&stub->args.access.loc, loc);
- stub->args.access.mask = mask;
+ stub->fn.access = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mask = mask;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_access_cbk_stub (call_frame_t *frame, fop_access_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -354,22 +318,19 @@ fop_access_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_ACCESS);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.access_cbk.fn = fn;
- stub->args.access_cbk.op_ret = op_ret;
- stub->args.access_cbk.op_errno = op_errno;
+ stub->fn_cbk.access = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size, dict_t *xdata)
+fop_readlink_stub (call_frame_t *frame, fop_readlink_t fn,
+ loc_t *loc, size_t size, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -379,24 +340,20 @@ fop_readlink_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_READLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readlink.fn = fn;
- loc_copy (&stub->args.readlink.loc, loc);
- stub->args.readlink.size = size;
+ stub->fn.readlink = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.size = size;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf, dict_t *xdata)
+fop_readlink_cbk_stub (call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *stbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -405,16 +362,15 @@ fop_readlink_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_READLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readlink_cbk.fn = fn;
- stub->args.readlink_cbk.op_ret = op_ret;
- stub->args.readlink_cbk.op_errno = op_errno;
+ stub->fn_cbk.readlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (path)
- stub->args.readlink_cbk.buf = gf_strdup (path);
- if (sbuf)
- stub->args.readlink_cbk.sbuf = *sbuf;
+ stub->args_cbk.buf = gf_strdup (path);
+ if (stbuf)
+ stub->args_cbk.stat = *stbuf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -432,28 +388,23 @@ fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
stub = stub_new (frame, 1, GF_FOP_MKNOD);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mknod.fn = fn;
- loc_copy (&stub->args.mknod.loc, loc);
- stub->args.mknod.mode = mode;
- stub->args.mknod.rdev = rdev;
- stub->args.mknod.umask = umask;
+ stub->fn.mknod = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mode = mode;
+ stub->args.rdev = rdev;
+ stub->args.umask = umask;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_mknod_cbk_stub (call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -462,19 +413,19 @@ fop_mknod_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_MKNOD);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mknod_cbk.fn = fn;
- stub->args.mknod_cbk.op_ret = op_ret;
- stub->args.mknod_cbk.op_errno = op_errno;
+ stub->fn_cbk.mknod = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (inode)
- stub->args.mknod_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.mknod_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preparent)
- stub->args.mknod_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.mknod_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
@@ -493,27 +444,22 @@ fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn,
stub = stub_new (frame, 1, GF_FOP_MKDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mkdir.fn = fn;
- loc_copy (&stub->args.mkdir.loc, loc);
- stub->args.mkdir.mode = mode;
- stub->args.mkdir.umask = umask;
+ stub->fn.mkdir = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.mode = mode;
+ stub->args.umask = umask;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
+fop_mkdir_cbk_stub (call_frame_t *frame, fop_mkdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -523,28 +469,26 @@ fop_mkdir_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_MKDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.mkdir_cbk.fn = fn;
- stub->args.mkdir_cbk.op_ret = op_ret;
- stub->args.mkdir_cbk.op_errno = op_errno;
+ stub->fn_cbk.mkdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (inode)
- stub->args.mkdir_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.mkdir_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preparent)
- stub->args.mkdir_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.mkdir_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
+fop_unlink_stub (call_frame_t *frame, fop_unlink_t fn,
loc_t *loc, int xflag, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -555,24 +499,21 @@ fop_unlink_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_UNLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.unlink.fn = fn;
- loc_copy (&stub->args.unlink.loc, loc);
- stub->args.unlink.xflag = xflag;
+ stub->fn.unlink = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.xflag = xflag;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_unlink_cbk_stub (call_frame_t *frame, fop_unlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -581,16 +522,15 @@ fop_unlink_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_UNLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.unlink_cbk.fn = fn;
- stub->args.unlink_cbk.op_ret = op_ret;
- stub->args.unlink_cbk.op_errno = op_errno;
+ stub->fn_cbk.unlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (preparent)
- stub->args.unlink_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.unlink_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -609,24 +549,21 @@ fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
stub = stub_new (frame, 1, GF_FOP_RMDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rmdir.fn = fn;
- loc_copy (&stub->args.rmdir.loc, loc);
- stub->args.rmdir.flags = flags;
+ stub->fn.rmdir = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_rmdir_cbk_stub (call_frame_t *frame, fop_rmdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -635,16 +572,15 @@ fop_rmdir_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_RMDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rmdir_cbk.fn = fn;
- stub->args.rmdir_cbk.op_ret = op_ret;
- stub->args.rmdir_cbk.op_errno = op_errno;
+ stub->fn_cbk.rmdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (preparent)
- stub->args.rmdir_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.rmdir_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -663,27 +599,23 @@ fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
stub = stub_new (frame, 1, GF_FOP_SYMLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.symlink.fn = fn;
- stub->args.symlink.linkname = gf_strdup (linkname);
- stub->args.symlink.umask = umask;
- loc_copy (&stub->args.symlink.loc, loc);
+ stub->fn.symlink = fn;
+ stub->args.linkname = gf_strdup (linkname);
+ stub->args.umask = umask;
+ loc_copy (&stub->args.loc, loc);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_symlink_cbk_stub (call_frame_t *frame, fop_symlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -692,30 +624,27 @@ fop_symlink_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_SYMLINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.symlink_cbk.fn = fn;
- stub->args.symlink_cbk.op_ret = op_ret;
- stub->args.symlink_cbk.op_errno = op_errno;
+ stub->fn_cbk.symlink = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (inode)
- stub->args.symlink_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.symlink_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preparent)
- stub->args.symlink_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.symlink_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_rename_stub (call_frame_t *frame, fop_rename_t fn,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -726,27 +655,22 @@ fop_rename_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_RENAME);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rename.fn = fn;
- loc_copy (&stub->args.rename.old, oldloc);
- loc_copy (&stub->args.rename.new, newloc);
+ stub->fn.rename = fn;
+ loc_copy (&stub->args.loc, oldloc);
+ loc_copy (&stub->args.loc2, newloc);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
+fop_rename_cbk_stub (call_frame_t *frame, fop_rename_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -755,32 +679,29 @@ fop_rename_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_RENAME);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rename_cbk.fn = fn;
- stub->args.rename_cbk.op_ret = op_ret;
- stub->args.rename_cbk.op_errno = op_errno;
+ stub->fn_cbk.rename = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (buf)
- stub->args.rename_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preoldparent)
- stub->args.rename_cbk.preoldparent = *preoldparent;
+ stub->args_cbk.preparent = *preoldparent;
if (postoldparent)
- stub->args.rename_cbk.postoldparent = *postoldparent;
+ stub->args_cbk.postparent = *postoldparent;
if (prenewparent)
- stub->args.rename_cbk.prenewparent = *prenewparent;
+ stub->args_cbk.preparent2 = *prenewparent;
if (postnewparent)
- stub->args.rename_cbk.postnewparent = *postnewparent;
+ stub->args_cbk.postparent2 = *postnewparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_link_stub (call_frame_t *frame, fop_link_t fn,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -791,27 +712,23 @@ fop_link_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_LINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.link.fn = fn;
- loc_copy (&stub->args.link.oldloc, oldloc);
- loc_copy (&stub->args.link.newloc, newloc);
+ stub->fn.link = fn;
+ loc_copy (&stub->args.loc, oldloc);
+ loc_copy (&stub->args.loc2, newloc);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_link_cbk_stub (call_frame_t *frame, fop_link_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -820,20 +737,19 @@ fop_link_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_LINK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.link_cbk.fn = fn;
- stub->args.link_cbk.op_ret = op_ret;
- stub->args.link_cbk.op_errno = op_errno;
+ stub->fn_cbk.link = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (inode)
- stub->args.link_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.link_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preparent)
- stub->args.link_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.link_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -852,31 +768,26 @@ fop_create_stub (call_frame_t *frame, fop_create_t fn,
stub = stub_new (frame, 1, GF_FOP_CREATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.create.fn = fn;
- loc_copy (&stub->args.create.loc, loc);
- stub->args.create.flags = flags;
- stub->args.create.mode = mode;
- stub->args.create.umask = umask;
+ stub->fn.create = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
+ stub->args.mode = mode;
+ stub->args.umask = umask;
if (fd)
- stub->args.create.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_create_cbk_stub (call_frame_t *frame, fop_create_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -885,33 +796,29 @@ fop_create_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_CREATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.create_cbk.fn = fn;
- stub->args.create_cbk.op_ret = op_ret;
- stub->args.create_cbk.op_errno = op_errno;
+ stub->fn_cbk.create = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (fd)
- stub->args.create_cbk.fd = fd_ref (fd);
+ stub->args_cbk.fd = fd_ref (fd);
if (inode)
- stub->args.create_cbk.inode = inode_ref (inode);
+ stub->args_cbk.inode = inode_ref (inode);
if (buf)
- stub->args.create_cbk.buf = *buf;
+ stub->args_cbk.stat = *buf;
if (preparent)
- stub->args.create_cbk.preparent = *preparent;
+ stub->args_cbk.preparent = *preparent;
if (postparent)
- stub->args.create_cbk.postparent = *postparent;
+ stub->args_cbk.postparent = *postparent;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata)
+fop_open_stub (call_frame_t *frame, fop_open_t fn,
+ loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -921,24 +828,21 @@ fop_open_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_OPEN);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.open.fn = fn;
- loc_copy (&stub->args.open.loc, loc);
- stub->args.open.flags = flags;
+ stub->fn.open = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.flags = flags;
if (fd)
- stub->args.open.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_open_cbk_stub (call_frame_t *frame, fop_open_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -948,25 +852,22 @@ fop_open_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_OPEN);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.open_cbk.fn = fn;
- stub->args.open_cbk.op_ret = op_ret;
- stub->args.open_cbk.op_errno = op_errno;
+ stub->fn_cbk.open = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (fd)
- stub->args.open_cbk.fd = fd_ref (fd);
+ stub->args_cbk.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off, uint32_t flags, dict_t *xdata)
+fop_readv_stub (call_frame_t *frame, fop_readv_t fn,
+ fd_t *fd, size_t size, off_t off, uint32_t flags,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -975,29 +876,24 @@ fop_readv_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_READ);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readv.fn = fn;
+ stub->fn.readv = fn;
if (fd)
- stub->args.readv.fd = fd_ref (fd);
- stub->args.readv.size = size;
- stub->args.readv.off = off;
- stub->args.readv.flags = flags;
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
+ stub->args.flags = flags;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
+fop_readv_cbk_stub (call_frame_t *frame, fop_readv_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
struct iobref *iobref, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1007,31 +903,26 @@ fop_readv_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_READ);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readv_cbk.fn = fn;
- stub->args.readv_cbk.op_ret = op_ret;
- stub->args.readv_cbk.op_errno = op_errno;
+ stub->fn_cbk.readv = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (op_ret >= 0) {
- stub->args.readv_cbk.vector = iov_dup (vector, count);
- stub->args.readv_cbk.count = count;
- stub->args.readv_cbk.stbuf = *stbuf;
- stub->args.readv_cbk.iobref = iobref_ref (iobref);
+ stub->args_cbk.vector = iov_dup (vector, count);
+ stub->args_cbk.count = count;
+ stub->args_cbk.stat = *stbuf;
+ stub->args_cbk.iobref = iobref_ref (iobref);
}
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+fop_writev_stub (call_frame_t *frame, fop_writev_t fn,
+ fd_t *fd, struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1041,29 +932,25 @@ fop_writev_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_WRITE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.writev.fn = fn;
+ stub->fn.writev = fn;
if (fd)
- stub->args.writev.fd = fd_ref (fd);
- stub->args.writev.vector = iov_dup (vector, count);
- stub->args.writev.count = count;
- stub->args.writev.off = off;
- stub->args.writev.flags = flags;
- stub->args.writev.iobref = iobref_ref (iobref);
+ stub->args.fd = fd_ref (fd);
+ stub->args.vector = iov_dup (vector, count);
+ stub->args.count = count;
+ stub->args.offset = off;
+ stub->args.flags = flags;
+ stub->args.iobref = iobref_ref (iobref);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_writev_cbk_stub (call_frame_t *frame, fop_writev_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1072,25 +959,22 @@ fop_writev_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_WRITE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.writev_cbk.fn = fn;
- stub->args.writev_cbk.op_ret = op_ret;
- stub->args.writev_cbk.op_errno = op_errno;
+ stub->fn_cbk.writev = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (op_ret >= 0)
- stub->args.writev_cbk.postbuf = *postbuf;
+ stub->args_cbk.poststat = *postbuf;
if (prebuf)
- stub->args.writev_cbk.prebuf = *prebuf;
+ stub->args_cbk.prestat = *prebuf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
-
call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
+fop_flush_stub (call_frame_t *frame, fop_flush_t fn,
fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1100,22 +984,19 @@ fop_flush_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FLUSH);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.flush.fn = fn;
+ stub->fn.flush = fn;
if (fd)
- stub->args.flush.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_flush_cbk_stub (call_frame_t *frame, fop_flush_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1124,24 +1005,19 @@ fop_flush_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FLUSH);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.flush_cbk.fn = fn;
- stub->args.flush_cbk.op_ret = op_ret;
- stub->args.flush_cbk.op_errno = op_errno;
+ stub->fn_cbk.flush = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
-
-
call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_fsync_stub (call_frame_t *frame, fop_fsync_t fn,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1150,25 +1026,21 @@ fop_fsync_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FSYNC);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsync.fn = fn;
+ stub->fn.fsync = fn;
if (fd)
- stub->args.fsync.fd = fd_ref (fd);
- stub->args.fsync.datasync = datasync;
+ stub->args.fd = fd_ref (fd);
+ stub->args.datasync = datasync;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_fsync_cbk_stub (call_frame_t *frame, fop_fsync_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1177,24 +1049,22 @@ fop_fsync_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FSYNC);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsync_cbk.fn = fn;
- stub->args.fsync_cbk.op_ret = op_ret;
- stub->args.fsync_cbk.op_errno = op_errno;
+ stub->fn_cbk.fsync = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (prebuf)
- stub->args.fsync_cbk.prebuf = *prebuf;
+ stub->args_cbk.prestat = *prebuf;
if (postbuf)
- stub->args.fsync_cbk.postbuf = *postbuf;
+ stub->args_cbk.poststat = *postbuf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
+fop_opendir_stub (call_frame_t *frame, fop_opendir_t fn,
loc_t *loc, fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1205,23 +1075,20 @@ fop_opendir_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_OPENDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.opendir.fn = fn;
- loc_copy (&stub->args.opendir.loc, loc);
+ stub->fn.opendir = fn;
+ loc_copy (&stub->args.loc, loc);
if (fd)
- stub->args.opendir.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_opendir_cbk_stub (call_frame_t *frame, fop_opendir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
fd_t *fd, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1231,25 +1098,22 @@ fop_opendir_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_OPENDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.opendir_cbk.fn = fn;
- stub->args.opendir_cbk.op_ret = op_ret;
- stub->args.opendir_cbk.op_errno = op_errno;
+ stub->fn_cbk.opendir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (fd)
- stub->args.opendir_cbk.fd = fd_ref (fd);
+ stub->args_cbk.fd = fd_ref (fd);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_fsyncdir_stub (call_frame_t *frame, fop_fsyncdir_t fn,
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1258,23 +1122,20 @@ fop_fsyncdir_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsyncdir.fn = fn;
+ stub->fn.fsyncdir = fn;
if (fd)
- stub->args.fsyncdir.fd = fd_ref (fd);
- stub->args.fsyncdir.datasync = datasync;
+ stub->args.fd = fd_ref (fd);
+ stub->args.datasync = datasync;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_fsyncdir_cbk_stub (call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1283,20 +1144,18 @@ fop_fsyncdir_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsyncdir_cbk.fn = fn;
- stub->args.fsyncdir_cbk.op_ret = op_ret;
- stub->args.fsyncdir_cbk.op_errno = op_errno;
+ stub->fn_cbk.fsyncdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
+fop_statfs_stub (call_frame_t *frame, fop_statfs_t fn,
loc_t *loc, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1307,21 +1166,18 @@ fop_statfs_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_STATFS);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.statfs.fn = fn;
- loc_copy (&stub->args.statfs.loc, loc);
+ stub->fn.statfs = fn;
+ loc_copy (&stub->args.loc, loc);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_statfs_cbk_stub (call_frame_t *frame, fop_statfs_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
struct statvfs *buf, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1331,24 +1187,21 @@ fop_statfs_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_STATFS);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.statfs_cbk.fn = fn;
- stub->args.statfs_cbk.op_ret = op_ret;
- stub->args.statfs_cbk.op_errno = op_errno;
+ stub->fn_cbk.statfs = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (op_ret == 0)
- stub->args.statfs_cbk.buf = *buf;
+ stub->args_cbk.statvfs = *buf;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
+fop_setxattr_stub (call_frame_t *frame, fop_setxattr_t fn,
+ loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1359,15 +1212,14 @@ fop_setxattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_SETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setxattr.fn = fn;
- loc_copy (&stub->args.setxattr.loc, loc);
+ stub->fn.setxattr = fn;
+ loc_copy (&stub->args.loc, loc);
/* TODO */
if (dict)
- stub->args.setxattr.dict = dict_ref (dict);
- stub->args.setxattr.flags = flags;
+ stub->args.xattr = dict_ref (dict);
+ stub->args.flags = flags;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -1386,21 +1238,19 @@ fop_setxattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_SETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setxattr_cbk.fn = fn;
- stub->args.setxattr_cbk.op_ret = op_ret;
- stub->args.setxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.setxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata)
+fop_getxattr_stub (call_frame_t *frame, fop_getxattr_t fn,
+ loc_t *loc, const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1410,24 +1260,21 @@ fop_getxattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_GETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.getxattr.fn = fn;
- loc_copy (&stub->args.getxattr.loc, loc);
+ stub->fn.getxattr = fn;
+ loc_copy (&stub->args.loc, loc);
if (name)
- stub->args.getxattr.name = gf_strdup (name);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_getxattr_cbk_stub (call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1437,26 +1284,22 @@ fop_getxattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_GETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.getxattr_cbk.fn = fn;
- stub->args.getxattr_cbk.op_ret = op_ret;
- stub->args.getxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.getxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
/* TODO */
if (dict)
- stub->args.getxattr_cbk.dict = dict_ref (dict);
+ stub->args_cbk.xattr = dict_ref (dict);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+fop_fsetxattr_stub (call_frame_t *frame, fop_fsetxattr_t fn,
+ fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1466,26 +1309,22 @@ fop_fsetxattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsetxattr.fn = fn;
- stub->args.fsetxattr.fd = fd_ref (fd);
+ stub->fn.fsetxattr = fn;
+ stub->args.fd = fd_ref (fd);
- /* TODO */
if (dict)
- stub->args.fsetxattr.dict = dict_ref (dict);
- stub->args.fsetxattr.flags = flags;
+ stub->args.xattr = dict_ref (dict);
+ stub->args.flags = flags;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_fsetxattr_cbk_stub (call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1494,22 +1333,19 @@ fop_fsetxattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsetxattr_cbk.fn = fn;
- stub->args.fsetxattr_cbk.op_ret = op_ret;
- stub->args.fsetxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.fsetxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_fgetxattr_stub (call_frame_t *frame, fop_fgetxattr_t fn,
+ fd_t *fd, const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1519,24 +1355,21 @@ fop_fgetxattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fgetxattr.fn = fn;
- stub->args.fgetxattr.fd = fd_ref (fd);
+ stub->fn.fgetxattr = fn;
+ stub->args.fd = fd_ref (fd);
if (name)
- stub->args.fgetxattr.name = gf_strdup (name);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_fgetxattr_cbk_stub (call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
dict_t *dict, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1546,26 +1379,22 @@ fop_fgetxattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_GETXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fgetxattr_cbk.fn = fn;
- stub->args.fgetxattr_cbk.op_ret = op_ret;
- stub->args.fgetxattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.fgetxattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- /* TODO */
if (dict)
- stub->args.fgetxattr_cbk.dict = dict_ref (dict);
+ stub->args_cbk.xattr = dict_ref (dict);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata)
+fop_removexattr_stub (call_frame_t *frame, fop_removexattr_t fn,
+ loc_t *loc, const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1576,22 +1405,19 @@ fop_removexattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.removexattr.fn = fn;
- loc_copy (&stub->args.removexattr.loc, loc);
- stub->args.removexattr.name = gf_strdup (name);
+ stub->fn.removexattr = fn;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_removexattr_cbk_stub (call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1600,21 +1426,19 @@ fop_removexattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.removexattr_cbk.fn = fn;
- stub->args.removexattr_cbk.op_ret = op_ret;
- stub->args.removexattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.removexattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_fremovexattr_stub (call_frame_t *frame,
- fop_fremovexattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_fremovexattr_stub (call_frame_t *frame, fop_fremovexattr_t fn,
+ fd_t *fd, const char *name, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1625,22 +1449,19 @@ fop_fremovexattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FREMOVEXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fremovexattr.fn = fn;
- stub->args.fremovexattr.fd = fd_ref (fd);
- stub->args.fremovexattr.name = gf_strdup (name);
+ stub->fn.fremovexattr = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fremovexattr_cbk_stub (call_frame_t *frame,
- fop_fremovexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_fremovexattr_cbk_stub (call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1649,21 +1470,19 @@ fop_fremovexattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FREMOVEXATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fremovexattr_cbk.fn = fn;
- stub->args.fremovexattr_cbk.op_ret = op_ret;
- stub->args.fremovexattr_cbk.op_errno = op_errno;
+ stub->fn_cbk.fremovexattr = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
+fop_lk_stub (call_frame_t *frame, fop_lk_t fn,
+ fd_t *fd, int32_t cmd,
struct gf_flock *lock, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1674,24 +1493,21 @@ fop_lk_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_LK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lk.fn = fn;
+ stub->fn.lk = fn;
if (fd)
- stub->args.lk.fd = fd_ref (fd);
- stub->args.lk.cmd = cmd;
- stub->args.lk.lock = *lock;
+ stub->args.fd = fd_ref (fd);
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_lk_cbk_stub (call_frame_t *frame, fop_lk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
struct gf_flock *lock, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1701,18 +1517,18 @@ fop_lk_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_LK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.lk_cbk.fn = fn;
- stub->args.lk_cbk.op_ret = op_ret;
- stub->args.lk_cbk.op_errno = op_errno;
+ stub->fn_cbk.lk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (op_ret == 0)
- stub->args.lk_cbk.lock = *lock;
+ stub->args_cbk.lock = *lock;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
const char *volume, loc_t *loc, int32_t cmd,
@@ -1726,21 +1542,21 @@ fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
stub = stub_new (frame, 1, GF_FOP_INODELK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.inodelk.fn = fn;
+ stub->fn.inodelk = fn;
if (volume)
- stub->args.inodelk.volume = gf_strdup (volume);
+ stub->args.volume = gf_strdup (volume);
- loc_copy (&stub->args.inodelk.loc, loc);
- stub->args.inodelk.cmd = cmd;
- stub->args.inodelk.lock = *lock;
+ loc_copy (&stub->args.loc, loc);
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1752,13 +1568,12 @@ fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
stub = stub_new (frame, 0, GF_FOP_INODELK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.inodelk_cbk.fn = fn;
- stub->args.inodelk_cbk.op_ret = op_ret;
- stub->args.inodelk_cbk.op_errno = op_errno;
+ stub->fn_cbk.inodelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -1777,20 +1592,19 @@ fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
stub = stub_new (frame, 1, GF_FOP_FINODELK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.finodelk.fn = fn;
+ stub->fn.finodelk = fn;
if (fd)
- stub->args.finodelk.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (volume)
- stub->args.finodelk.volume = gf_strdup (volume);
+ stub->args.volume = gf_strdup (volume);
- stub->args.finodelk.cmd = cmd;
- stub->args.finodelk.lock = *lock;
+ stub->args.cmd = cmd;
+ stub->args.lock = *lock;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -1807,13 +1621,12 @@ fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
stub = stub_new (frame, 0, GF_FOP_FINODELK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.finodelk_cbk.fn = fn;
- stub->args.finodelk_cbk.op_ret = op_ret;
- stub->args.finodelk_cbk.op_errno = op_errno;
+ stub->fn_cbk.finodelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -1831,25 +1644,26 @@ fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.entrylk.fn = fn;
+ stub->fn.entrylk = fn;
if (volume)
- stub->args.entrylk.volume = gf_strdup (volume);
+ stub->args.volume = gf_strdup (volume);
+
+ loc_copy (&stub->args.loc, loc);
- loc_copy (&stub->args.entrylk.loc, loc);
+ stub->args.entrylkcmd = cmd;
+ stub->args.entrylktype = type;
- stub->args.entrylk.cmd = cmd;
- stub->args.entrylk.type = type;
if (name)
- stub->args.entrylk.name = gf_strdup (name);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1861,13 +1675,12 @@ fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.entrylk_cbk.fn = fn;
- stub->args.entrylk_cbk.op_ret = op_ret;
- stub->args.entrylk_cbk.op_errno = op_errno;
+ stub->fn_cbk.entrylk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
@@ -1885,25 +1698,25 @@ fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fentrylk.fn = fn;
+ stub->fn.fentrylk = fn;
if (volume)
- stub->args.fentrylk.volume = gf_strdup (volume);
+ stub->args.volume = gf_strdup (volume);
if (fd)
- stub->args.fentrylk.fd = fd_ref (fd);
- stub->args.fentrylk.cmd = cmd;
- stub->args.fentrylk.type = type;
+ stub->args.fd = fd_ref (fd);
+ stub->args.entrylkcmd = cmd;
+ stub->args.entrylktype = type;
if (name)
- stub->args.fentrylk.name = gf_strdup (name);
+ stub->args.name = gf_strdup (name);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
@@ -1915,23 +1728,20 @@ fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fentrylk_cbk.fn = fn;
- stub->args.fentrylk_cbk.op_ret = op_ret;
- stub->args.fentrylk_cbk.op_errno = op_errno;
+ stub->fn_cbk.fentrylk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readdirp_cbk_stub (call_frame_t *frame,
- fop_readdirp_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_readdirp_cbk_stub (call_frame_t *frame, fop_readdirp_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
gf_dirent_t *entries, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1942,14 +1752,10 @@ fop_readdirp_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_READDIRP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readdirp_cbk.fn = fn;
- stub->args.readdirp_cbk.op_ret = op_ret;
- stub->args.readdirp_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdirp_cbk.entries.list);
+ stub->fn_cbk.readdirp = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
if (op_ret > 0) {
@@ -1960,23 +1766,22 @@ fop_readdirp_cbk_stub (call_frame_t *frame,
stub_entry->d_off = entry->d_off;
stub_entry->d_ino = entry->d_ino;
stub_entry->d_stat = entry->d_stat;
+ if (entry->inode)
+ stub_entry->inode = inode_ref (entry->inode);
list_add_tail (&stub_entry->list,
- &stub->args.readdirp_cbk.entries.list);
+ &stub->args_cbk.entries.list);
}
}
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_readdir_cbk_stub (call_frame_t *frame, fop_readdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
gf_dirent_t *entries, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -1987,14 +1792,10 @@ fop_readdir_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_READDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readdir_cbk.fn = fn;
- stub->args.readdir_cbk.op_ret = op_ret;
- stub->args.readdir_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdir_cbk.entries.list);
+ stub->fn_cbk.readdir = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
if (op_ret > 0) {
@@ -2006,21 +1807,19 @@ fop_readdir_cbk_stub (call_frame_t *frame,
stub_entry->d_ino = entry->d_ino;
list_add_tail (&stub_entry->list,
- &stub->args.readdir_cbk.entries.list);
+ &stub->args_cbk.entries.list);
}
}
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
+fop_readdir_stub (call_frame_t *frame, fop_readdir_t fn,
+ fd_t *fd, size_t size,
off_t off, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2028,47 +1827,41 @@ fop_readdir_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_READDIR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readdir.fn = fn;
- stub->args.readdir.fd = fd_ref (fd);
- stub->args.readdir.size = size;
- stub->args.readdir.off = off;
+ stub->fn.readdir = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_readdirp_stub (call_frame_t *frame,
- fop_readdirp_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *xdata)
+fop_readdirp_stub (call_frame_t *frame, fop_readdirp_t fn,
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
call_stub_t *stub = NULL;
stub = stub_new (frame, 1, GF_FOP_READDIRP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.readdirp.fn = fn;
- stub->args.readdirp.fd = fd_ref (fd);
- stub->args.readdirp.size = size;
- stub->args.readdirp.off = off;
+ stub->fn.readdirp = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.size = size;
+ stub->args.offset = off;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_rchecksum_stub (call_frame_t *frame,
- fop_rchecksum_t fn,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
+fop_rchecksum_stub (call_frame_t *frame, fop_rchecksum_t fn,
+ fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2078,25 +1871,22 @@ fop_rchecksum_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_RCHECKSUM);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rchecksum.fn = fn;
- stub->args.rchecksum.fd = fd_ref (fd);
- stub->args.rchecksum.offset = offset;
- stub->args.rchecksum.len = len;
+ stub->fn.rchecksum = fn;
+ stub->args.fd = fd_ref (fd);
+ stub->args.offset = offset;
+ stub->args.size = len;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_rchecksum_cbk_stub (call_frame_t *frame,
- fop_rchecksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata)
+fop_rchecksum_cbk_stub (call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2105,31 +1895,27 @@ fop_rchecksum_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_RCHECKSUM);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.rchecksum_cbk.fn = fn;
- stub->args.rchecksum_cbk.op_ret = op_ret;
- stub->args.rchecksum_cbk.op_errno = op_errno;
+ stub->fn_cbk.rchecksum = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- if (op_ret >= 0)
- {
- stub->args.rchecksum_cbk.weak_checksum =
+ if (op_ret >= 0) {
+ stub->args_cbk.weak_checksum =
weak_checksum;
-
- stub->args.rchecksum_cbk.strong_checksum =
+ stub->args_cbk.strong_checksum =
memdup (strong_checksum, MD5_DIGEST_LENGTH);
}
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_xattrop_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_xattrop_cbk_stub (call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2138,23 +1924,20 @@ fop_xattrop_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_XATTROP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.xattrop_cbk.fn = fn;
- stub->args.xattrop_cbk.op_ret = op_ret;
- stub->args.xattrop_cbk.op_errno = op_errno;
+ stub->fn_cbk.xattrop = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_fxattrop_cbk_stub (call_frame_t *frame,
- fop_fxattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
+fop_fxattrop_cbk_stub (call_frame_t *frame, fop_fxattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
dict_t *xattr, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2163,25 +1946,22 @@ fop_fxattrop_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FXATTROP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fxattrop_cbk.fn = fn;
- stub->args.fxattrop_cbk.op_ret = op_ret;
- stub->args.fxattrop_cbk.op_errno = op_errno;
+ stub->fn_cbk.fxattrop = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (xattr)
- stub->args.fxattrop_cbk.xattr = dict_ref (xattr);
+ stub->args_cbk.xattr = dict_ref (xattr);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
+fop_xattrop_stub (call_frame_t *frame, fop_xattrop_t fn,
+ loc_t *loc, gf_xattrop_flags_t optype,
dict_t *xattr, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2192,25 +1972,23 @@ fop_xattrop_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_XATTROP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.xattrop.fn = fn;
+ stub->fn.xattrop = fn;
- loc_copy (&stub->args.xattrop.loc, loc);
+ loc_copy (&stub->args.loc, loc);
- stub->args.xattrop.optype = optype;
- stub->args.xattrop.xattr = dict_ref (xattr);
+ stub->args.optype = optype;
+ stub->args.xattr = dict_ref (xattr);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
+fop_fxattrop_stub (call_frame_t *frame, fop_fxattrop_t fn,
+ fd_t *fd, gf_xattrop_flags_t optype,
dict_t *xattr, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2221,28 +1999,25 @@ fop_fxattrop_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FXATTROP);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fxattrop.fn = fn;
+ stub->fn.fxattrop = fn;
- stub->args.fxattrop.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
- stub->args.fxattrop.optype = optype;
- stub->args.fxattrop.xattr = dict_ref (xattr);
+ stub->args.optype = optype;
+ stub->args.xattr = dict_ref (xattr);
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
call_stub_t *
-fop_setattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_setattr_cbk_stub (call_frame_t *frame, fop_setattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2251,30 +2026,28 @@ fop_setattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_SETATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setattr_cbk.fn = fn;
+ stub->fn_cbk.setattr = fn;
- stub->args.setattr_cbk.op_ret = op_ret;
- stub->args.setattr_cbk.op_errno = op_errno;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
+ stub->args_cbk.prestat = *statpre;
if (statpost)
- stub->args.setattr_cbk.statpost = *statpost;
+ stub->args_cbk.poststat = *statpost;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_fsetattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_fsetattr_cbk_stub (call_frame_t *frame, fop_setattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2283,27 +2056,25 @@ fop_fsetattr_cbk_stub (call_frame_t *frame,
stub = stub_new (frame, 0, GF_FOP_FSETATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsetattr_cbk.fn = fn;
+ stub->fn_cbk.fsetattr = fn;
- stub->args.fsetattr_cbk.op_ret = op_ret;
- stub->args.fsetattr_cbk.op_errno = op_errno;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
+ stub->args_cbk.prestat = *statpre;
if (statpost)
- stub->args.fsetattr_cbk.statpost = *statpost;
+ stub->args_cbk.poststat = *statpost;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args_cbk.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_setattr_stub (call_frame_t *frame,
- fop_setattr_t fn,
- loc_t *loc,
- struct iatt *stbuf,
+fop_setattr_stub (call_frame_t *frame, fop_setattr_t fn,
+ loc_t *loc, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2314,27 +2085,25 @@ fop_setattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_SETATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.setattr.fn = fn;
+ stub->fn.setattr = fn;
- loc_copy (&stub->args.setattr.loc, loc);
+ loc_copy (&stub->args.loc, loc);
if (stbuf)
- stub->args.setattr.stbuf = *stbuf;
+ stub->args.stat = *stbuf;
- stub->args.setattr.valid = valid;
+ stub->args.valid = valid;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
+
call_stub_t *
-fop_fsetattr_stub (call_frame_t *frame,
- fop_fsetattr_t fn,
- fd_t *fd,
- struct iatt *stbuf,
+fop_fsetattr_stub (call_frame_t *frame, fop_fsetattr_t fn,
+ fd_t *fd, struct iatt *stbuf,
int32_t valid, dict_t *xdata)
{
call_stub_t *stub = NULL;
@@ -2345,1775 +2114,695 @@ fop_fsetattr_stub (call_frame_t *frame,
stub = stub_new (frame, 1, GF_FOP_FSETATTR);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- stub->args.fsetattr.fn = fn;
+ stub->fn.fsetattr = fn;
if (fd)
- stub->args.fsetattr.fd = fd_ref (fd);
+ stub->args.fd = fd_ref (fd);
if (stbuf)
- stub->args.fsetattr.stbuf = *stbuf;
+ stub->args.stat = *stbuf;
- stub->args.fsetattr.valid = valid;
+ stub->args.valid = valid;
if (xdata)
- stub->xdata = dict_ref (xdata);
-
+ stub->args.xdata = dict_ref (xdata);
out:
return stub;
}
-static void
-call_resume_wind (call_stub_t *stub)
+call_stub_t *
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+
+ stub = stub_new (frame, 0, GF_FOP_FALLOCATE);
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- stub->args.open.fn (stub->frame,
- stub->frame->this,
- &stub->args.open.loc,
- stub->args.open.flags, stub->args.open.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_CREATE:
- {
- stub->args.create.fn (stub->frame,
- stub->frame->this,
- &stub->args.create.loc,
- stub->args.create.flags,
- stub->args.create.mode,
- stub->args.create.umask,
- stub->args.create.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_STAT:
- {
- stub->args.stat.fn (stub->frame,
- stub->frame->this,
- &stub->args.stat.loc, stub->xdata);
- break;
- }
- case GF_FOP_READLINK:
- {
- stub->args.readlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.readlink.loc,
- stub->args.readlink.size, stub->xdata);
- break;
- }
+ stub->fn_cbk.fallocate = fn;
- case GF_FOP_MKNOD:
- {
- stub->args.mknod.fn (stub->frame, stub->frame->this,
- &stub->args.mknod.loc,
- stub->args.mknod.mode,
- stub->args.mknod.rdev,
- stub->args.mknod.umask, stub->xdata);
- }
- break;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- case GF_FOP_MKDIR:
- {
- stub->args.mkdir.fn (stub->frame, stub->frame->this,
- &stub->args.mkdir.loc,
- stub->args.mkdir.mode,
- stub->args.mkdir.umask, stub->xdata);
- }
- break;
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
- case GF_FOP_UNLINK:
- {
- stub->args.unlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.unlink.loc,
- stub->args.unlink.xflag, stub->xdata);
- }
- break;
+call_stub_t *
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_RMDIR:
- {
- stub->args.rmdir.fn (stub->frame, stub->frame->this,
- &stub->args.rmdir.loc,
- stub->args.rmdir.flags, stub->xdata);
- }
- break;
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- case GF_FOP_SYMLINK:
- {
- stub->args.symlink.fn (stub->frame,
- stub->frame->this,
- stub->args.symlink.linkname,
- &stub->args.symlink.loc,
- stub->args.symlink.umask, stub->xdata);
- }
- break;
+ stub = stub_new (frame, 1, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- case GF_FOP_RENAME:
- {
- stub->args.rename.fn (stub->frame,
- stub->frame->this,
- &stub->args.rename.old,
- &stub->args.rename.new, stub->xdata);
- }
- break;
+ stub->fn.fallocate = fn;
- case GF_FOP_LINK:
- {
- stub->args.link.fn (stub->frame,
- stub->frame->this,
- &stub->args.link.oldloc,
- &stub->args.link.newloc, stub->xdata);
- }
- break;
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- case GF_FOP_TRUNCATE:
- {
- stub->args.truncate.fn (stub->frame,
- stub->frame->this,
- &stub->args.truncate.loc,
- stub->args.truncate.off, stub->xdata);
- break;
- }
+ stub->args.flags = mode;
+ stub->args.offset = offset;
+ stub->args.size = len;
- case GF_FOP_READ:
- {
- stub->args.readv.fn (stub->frame,
- stub->frame->this,
- stub->args.readv.fd,
- stub->args.readv.size,
- stub->args.readv.off,
- stub->args.readv.flags, stub->xdata);
- break;
- }
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
- case GF_FOP_WRITE:
- {
- stub->args.writev.fn (stub->frame,
- stub->frame->this,
- stub->args.writev.fd,
- stub->args.writev.vector,
- stub->args.writev.count,
- stub->args.writev.off,
- stub->args.writev.flags,
- stub->args.writev.iobref, stub->xdata);
- break;
- }
+}
- case GF_FOP_STATFS:
- {
- stub->args.statfs.fn (stub->frame,
- stub->frame->this,
- &stub->args.statfs.loc, stub->xdata);
- break;
- }
- case GF_FOP_FLUSH:
- {
- stub->args.flush.fn (stub->frame,
- stub->frame->this,
- stub->args.flush.fd, stub->xdata);
- break;
- }
+call_stub_t *
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FSYNC:
- {
- stub->args.fsync.fn (stub->frame,
- stub->frame->this,
- stub->args.fsync.fd,
- stub->args.fsync.datasync, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- case GF_FOP_SETXATTR:
- {
- stub->args.setxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setxattr.loc,
- stub->args.setxattr.dict,
- stub->args.setxattr.flags, stub->xdata);
- break;
- }
+ stub = stub_new (frame, 0, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- case GF_FOP_GETXATTR:
- {
- stub->args.getxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.getxattr.loc,
- stub->args.getxattr.name, stub->xdata);
- break;
- }
+ stub->fn_cbk.discard = fn;
- case GF_FOP_FSETXATTR:
- {
- stub->args.fsetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetxattr.fd,
- stub->args.fsetxattr.dict,
- stub->args.fsetxattr.flags, stub->xdata);
- break;
- }
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- case GF_FOP_FGETXATTR:
- {
- stub->args.fgetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fgetxattr.fd,
- stub->args.fgetxattr.name, stub->xdata);
- break;
- }
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
- case GF_FOP_REMOVEXATTR:
- {
- stub->args.removexattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.removexattr.loc,
- stub->args.removexattr.name, stub->xdata);
- break;
- }
+call_stub_t *
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FREMOVEXATTR:
- {
- stub->args.fremovexattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fremovexattr.fd,
- stub->args.fremovexattr.name, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- case GF_FOP_OPENDIR:
- {
- stub->args.opendir.fn (stub->frame,
- stub->frame->this,
- &stub->args.opendir.loc,
- stub->args.opendir.fd, stub->xdata);
- break;
- }
+ stub = stub_new (frame, 1, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- case GF_FOP_FSYNCDIR:
- {
- stub->args.fsyncdir.fn (stub->frame,
- stub->frame->this,
- stub->args.fsyncdir.fd,
- stub->args.fsyncdir.datasync, stub->xdata);
- break;
- }
+ stub->fn.discard = fn;
- case GF_FOP_ACCESS:
- {
- stub->args.access.fn (stub->frame,
- stub->frame->this,
- &stub->args.access.loc,
- stub->args.access.mask, stub->xdata);
- break;
- }
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- case GF_FOP_FTRUNCATE:
- {
- stub->args.ftruncate.fn (stub->frame,
- stub->frame->this,
- stub->args.ftruncate.fd,
- stub->args.ftruncate.off, stub->xdata);
- break;
- }
+ stub->args.offset = offset;
+ stub->args.size = len;
- case GF_FOP_FSTAT:
- {
- stub->args.fstat.fn (stub->frame,
- stub->frame->this,
- stub->args.fstat.fd, stub->xdata);
- break;
- }
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
+out:
+ return stub;
- case GF_FOP_LK:
- {
- stub->args.lk.fn (stub->frame,
- stub->frame->this,
- stub->args.lk.fd,
- stub->args.lk.cmd,
- &stub->args.lk.lock, stub->xdata);
- break;
- }
+}
- case GF_FOP_INODELK:
- {
- stub->args.inodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.inodelk.volume,
- &stub->args.inodelk.loc,
- stub->args.inodelk.cmd,
- &stub->args.inodelk.lock, stub->xdata);
- break;
- }
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FINODELK:
- {
- stub->args.finodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.finodelk.volume,
- stub->args.finodelk.fd,
- stub->args.finodelk.cmd,
- &stub->args.finodelk.lock, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- case GF_FOP_ENTRYLK:
- {
- stub->args.entrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.entrylk.volume,
- &stub->args.entrylk.loc,
- stub->args.entrylk.name,
- stub->args.entrylk.cmd,
- stub->args.entrylk.type, stub->xdata);
- break;
- }
+ stub = stub_new (frame, 0, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- case GF_FOP_FENTRYLK:
- {
- stub->args.fentrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.fentrylk.volume,
- stub->args.fentrylk.fd,
- stub->args.fentrylk.name,
- stub->args.fentrylk.cmd,
- stub->args.fentrylk.type, stub->xdata);
- break;
- }
+ stub->fn_cbk.zerofill = fn;
- break;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- case GF_FOP_LOOKUP:
- {
- stub->args.lookup.fn (stub->frame,
- stub->frame->this,
- &stub->args.lookup.loc,
- stub->xdata);
- break;
- }
+ if (statpre)
+ stub->args_cbk.prestat = *statpre;
+ if (statpost)
+ stub->args_cbk.poststat = *statpost;
+ if (xdata)
+ stub->args_cbk.xdata = dict_ref (xdata);
+out:
+ return stub;
+}
- case GF_FOP_RCHECKSUM:
- {
- stub->args.rchecksum.fn (stub->frame,
- stub->frame->this,
- stub->args.rchecksum.fd,
- stub->args.rchecksum.offset,
- stub->args.rchecksum.len, stub->xdata);
- break;
- }
+call_stub_t *
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_READDIR:
- {
- stub->args.readdir.fn (stub->frame,
- stub->frame->this,
- stub->args.readdir.fd,
- stub->args.readdir.size,
- stub->args.readdir.off, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
- case GF_FOP_READDIRP:
- {
- stub->args.readdirp.fn (stub->frame,
- stub->frame->this,
- stub->args.readdirp.fd,
- stub->args.readdirp.size,
- stub->args.readdirp.off,
- stub->xdata);
- break;
- }
+ stub = stub_new (frame, 1, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- case GF_FOP_XATTROP:
- {
- stub->args.xattrop.fn (stub->frame,
- stub->frame->this,
- &stub->args.xattrop.loc,
- stub->args.xattrop.optype,
- stub->args.xattrop.xattr, stub->xdata);
+ stub->fn.zerofill = fn;
- break;
- }
- case GF_FOP_FXATTROP:
- {
- stub->args.fxattrop.fn (stub->frame,
- stub->frame->this,
- stub->args.fxattrop.fd,
- stub->args.fxattrop.optype,
- stub->args.fxattrop.xattr, stub->xdata);
+ if (fd)
+ stub->args.fd = fd_ref (fd);
- break;
- }
- case GF_FOP_SETATTR:
- {
- stub->args.setattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setattr.loc,
- &stub->args.setattr.stbuf,
- stub->args.setattr.valid, stub->xdata);
- break;
- }
- case GF_FOP_FSETATTR:
- {
- stub->args.fsetattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetattr.fd,
- &stub->args.fsetattr.stbuf,
- stub->args.fsetattr.valid, stub->xdata);
- break;
- }
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
+ stub->args.offset = offset;
+ stub->args.size = len;
- }
+ if (xdata)
+ stub->args.xdata = dict_ref (xdata);
out:
- return;
-}
+ return stub;
+}
static void
-call_resume_unwind (call_stub_t *stub)
+call_resume_wind (call_stub_t *stub)
{
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
switch (stub->fop) {
case GF_FOP_OPEN:
- {
- if (!stub->args.open_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
- else
- stub->args.open_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
+ stub->fn.open (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.fd, stub->args.xdata);
break;
- }
-
case GF_FOP_CREATE:
- {
- if (!stub->args.create_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
- else
- stub->args.create_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
-
+ stub->fn.create (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.mode, stub->args.umask,
+ stub->args.fd, stub->args.xdata);
break;
- }
-
case GF_FOP_STAT:
- {
- if (!stub->args.stat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
- else
- stub->args.stat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
-
+ stub->fn.stat (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
break;
- }
-
case GF_FOP_READLINK:
- {
- if (!stub->args.readlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
- else
- stub->args.readlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
-
+ stub->fn.readlink (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.size,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_MKNOD:
- {
- if (!stub->args.mknod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- else
- stub->args.mknod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.mknod (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mode,
+ stub->args.rdev, stub->args.umask,
+ stub->args.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- if (!stub->args.mkdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
- else
- stub->args.mkdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
-
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
-
- break;
- }
-
+ stub->fn.mkdir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mode,
+ stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- if (!stub->args.unlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- else
- stub->args.unlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.unlink (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xflag,
+ stub->args.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- if (!stub->args.rmdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- else
- stub->args.rmdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.rmdir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- if (!stub->args.symlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- else
- stub->args.symlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- }
- break;
-
+ stub->fn.symlink (stub->frame, stub->frame->this,
+ stub->args.linkname, &stub->args.loc,
+ stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_RENAME:
- {
- if (!stub->args.rename_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- else
- stub->args.rename_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- break;
- }
-
+ stub->fn.rename (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.loc2,
+ stub->args.xdata);
+ break;
case GF_FOP_LINK:
- {
- if (!stub->args.link_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf, stub->xdata);
- else
- stub->args.link_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf,
- &stub->args.link_cbk.preparent,
- &stub->args.link_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.link (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.loc2,
+ stub->args.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- if (!stub->args.truncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
- else
- stub->args.truncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
+ stub->fn.truncate (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.offset,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_READ:
- {
- if (!stub->args.readv_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- else
- stub->args.readv_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- }
- break;
-
+ stub->fn.readv (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_WRITE:
- {
- if (!stub->args.writev_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
- else
- stub->args.writev_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
+ stub->fn.writev (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.vector,
+ stub->args.count, stub->args.offset,
+ stub->args.flags, stub->args.iobref,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_STATFS:
- {
- if (!stub->args.statfs_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- else
- stub->args.statfs_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- }
- break;
-
+ stub->fn.statfs (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (!stub->args.flush_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
- else
- stub->args.flush_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
-
+ stub->fn.flush (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xdata);
break;
- }
-
case GF_FOP_FSYNC:
- {
- if (!stub->args.fsync_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
- else
- stub->args.fsync_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
+ stub->fn.fsync (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.datasync,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_SETXATTR:
- {
- if (!stub->args.setxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.setxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
+ stub->fn.setxattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xattr,
+ stub->args.flags, stub->args.xdata);
break;
- }
-
case GF_FOP_GETXATTR:
- {
- if (!stub->args.getxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
- else
- stub->args.getxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
+ stub->fn.getxattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_FSETXATTR:
- {
- if (!stub->args.fsetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fsetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
+ stub->fn.fsetxattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xattr,
+ stub->args.flags, stub->args.xdata);
break;
- }
-
case GF_FOP_FGETXATTR:
- {
- if (!stub->args.fgetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
- else
- stub->args.fgetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
+ stub->fn.fgetxattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.name,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_REMOVEXATTR:
- {
- if (!stub->args.removexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.removexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
-
+ stub->fn.removexattr (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_FREMOVEXATTR:
- {
- if (!stub->args.fremovexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.fremovexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
-
+ stub->fn.fremovexattr (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.name,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_OPENDIR:
- {
- if (!stub->args.opendir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
- else
- stub->args.opendir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
+ stub->fn.opendir (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.fd,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_FSYNCDIR:
- {
- if (!stub->args.fsyncdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
- else
- stub->args.fsyncdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
+ stub->fn.fsyncdir (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.datasync,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_ACCESS:
- {
- if (!stub->args.access_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
- else
- stub->args.access_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
-
+ stub->fn.access (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.mask,
+ stub->args.xdata);
break;
- }
case GF_FOP_FTRUNCATE:
- {
- if (!stub->args.ftruncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
- else
- stub->args.ftruncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
+ stub->fn.ftruncate (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_FSTAT:
- {
- if (!stub->args.fstat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
- else
- stub->args.fstat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
-
+ stub->fn.fstat (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.xdata);
break;
- }
-
case GF_FOP_LK:
- {
- if (!stub->args.lk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
- else
- stub->args.lk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
+ stub->fn.lk (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.cmd,
+ &stub->args.lock, stub->args.xdata);
break;
- }
-
case GF_FOP_INODELK:
- {
- if (!stub->args.inodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.inodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
+ stub->fn.inodelk (stub->frame, stub->frame->this,
+ stub->args.volume, &stub->args.loc,
+ stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_FINODELK:
- {
- if (!stub->args.finodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.finodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
+ stub->fn.finodelk (stub->frame, stub->frame->this,
+ stub->args.volume, stub->args.fd,
+ stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
break;
- }
-
case GF_FOP_ENTRYLK:
- {
- if (!stub->args.entrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.entrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
+ stub->fn.entrylk (stub->frame, stub->frame->this,
+ stub->args.volume, &stub->args.loc,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
break;
- }
-
case GF_FOP_FENTRYLK:
- {
- if (!stub->args.fentrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fentrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.fentrylk (stub->frame, stub->frame->this,
+ stub->args.volume, stub->args.fd,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
+ break;
case GF_FOP_LOOKUP:
- {
- if (!stub->args.lookup_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- else
- stub->args.lookup_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- /* FIXME NULL should not be passed */
-
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
-
+ stub->fn.lookup (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
break;
- }
-
case GF_FOP_RCHECKSUM:
- {
- if (!stub->args.rchecksum_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
- else
- stub->args.rchecksum_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
-
- if (stub->args.rchecksum_cbk.op_ret >= 0)
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
-
+ stub->fn.rchecksum (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
break;
- }
-
case GF_FOP_READDIR:
- {
- if (!stub->args.readdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
- else
- stub->args.readdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
-
- if (stub->args.readdir_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdir_cbk.entries);
-
+ stub->fn.readdir (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
break;
- }
-
case GF_FOP_READDIRP:
- {
- if (!stub->args.readdirp_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
- else
- stub->args.readdirp_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
-
- if (stub->args.readdirp_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
-
+ stub->fn.readdirp (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
break;
- }
-
case GF_FOP_XATTROP:
- {
- if (!stub->args.xattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.xattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno,
- stub->args.xattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
-
+ stub->fn.xattrop (stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
break;
- }
case GF_FOP_FXATTROP:
- {
- if (!stub->args.fxattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.fxattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno,
- stub->args.fxattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
-
+ stub->fn.fxattrop (stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
break;
- }
case GF_FOP_SETATTR:
- {
- if (!stub->args.setattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
- else
- stub->args.setattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
+ stub->fn.setattr (stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
break;
- }
case GF_FOP_FSETATTR:
- {
- if (!stub->args.fsetattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
- else
- stub->args.fsetattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
+ stub->fn.fsetattr (stub->frame, stub->frame->this,
+ stub->args.fd, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ stub->fn.fallocate(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.flags,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ stub->fn.discard(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ stub->fn.zerofill(stub->frame, stub->frame->this,
+ stub->args.fd, stub->args.offset,
+ stub->args.size, stub->args.xdata);
break;
- }
+
default:
- {
gf_log_callingfn ("call-stub", GF_LOG_ERROR,
"Invalid value of FOP (%d)",
stub->fop);
break;
}
- }
out:
return;
}
+#define STUB_UNWIND(stb, fop, args ...) do { \
+ if (stb->fn_cbk.fop) \
+ stb->fn_cbk.fop (stb->frame, stb->frame->cookie, \
+ stb->frame->this, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ else \
+ STACK_UNWIND_STRICT (fop, stb->frame, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ } while (0)
+
+
static void
-call_stub_destroy_wind (call_stub_t *stub)
+call_resume_unwind (call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
+ GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
switch (stub->fop) {
case GF_FOP_OPEN:
- {
- loc_wipe (&stub->args.open.loc);
- if (stub->args.open.fd)
- fd_unref (stub->args.open.fd);
+ STUB_UNWIND (stub, open, stub->args_cbk.fd,
+ stub->args_cbk.xdata);
break;
- }
case GF_FOP_CREATE:
- {
- loc_wipe (&stub->args.create.loc);
- if (stub->args.create.fd)
- fd_unref (stub->args.create.fd);
+ STUB_UNWIND (stub, create, stub->args_cbk.fd,
+ stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
break;
- }
case GF_FOP_STAT:
- {
- loc_wipe (&stub->args.stat.loc);
+ STUB_UNWIND (stub, stat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
break;
- }
case GF_FOP_READLINK:
- {
- loc_wipe (&stub->args.readlink.loc);
- break;
- }
-
+ STUB_UNWIND (stub, readlink, stub->args_cbk.buf,
+ &stub->args_cbk.stat, stub->args.xdata);
+ break;
case GF_FOP_MKNOD:
- {
- loc_wipe (&stub->args.mknod.loc);
- }
- break;
-
+ STUB_UNWIND (stub, mknod, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- loc_wipe (&stub->args.mkdir.loc);
- }
- break;
-
+ STUB_UNWIND (stub, mkdir, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- loc_wipe (&stub->args.unlink.loc);
- }
- break;
-
+ STUB_UNWIND (stub, unlink, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- loc_wipe (&stub->args.rmdir.loc);
- }
- break;
-
+ STUB_UNWIND (stub, rmdir, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- GF_FREE ((char *)stub->args.symlink.linkname);
- loc_wipe (&stub->args.symlink.loc);
- }
- break;
-
+ STUB_UNWIND (stub, symlink, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RENAME:
- {
- loc_wipe (&stub->args.rename.old);
- loc_wipe (&stub->args.rename.new);
- }
- break;
-
+ STUB_UNWIND (stub, rename, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent,
+ &stub->args_cbk.preparent2,
+ &stub->args_cbk.postparent2,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_LINK:
- {
- loc_wipe (&stub->args.link.oldloc);
- loc_wipe (&stub->args.link.newloc);
- }
- break;
-
+ STUB_UNWIND (stub, link, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- loc_wipe (&stub->args.truncate.loc);
- break;
- }
-
+ STUB_UNWIND (stub, truncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_READ:
- {
- if (stub->args.readv.fd)
- fd_unref (stub->args.readv.fd);
- break;
- }
-
+ STUB_UNWIND (stub, readv, stub->args_cbk.vector,
+ stub->args_cbk.count, &stub->args_cbk.stat,
+ stub->args_cbk.iobref, stub->args_cbk.xdata);
+ break;
case GF_FOP_WRITE:
- {
- struct iobref *iobref = stub->args.writev.iobref;
- if (stub->args.writev.fd)
- fd_unref (stub->args.writev.fd);
- GF_FREE (stub->args.writev.vector);
- if (iobref)
- iobref_unref (iobref);
- break;
- }
-
+ STUB_UNWIND (stub, writev, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_STATFS:
- {
- loc_wipe (&stub->args.statfs.loc);
- break;
- }
+ STUB_UNWIND (stub, statfs, &stub->args_cbk.statvfs,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (stub->args.flush.fd)
- fd_unref (stub->args.flush.fd);
- break;
- }
-
+ STUB_UNWIND (stub, flush, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNC:
- {
- if (stub->args.fsync.fd)
- fd_unref (stub->args.fsync.fd);
- break;
- }
-
+ STUB_UNWIND (stub, fsync, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_SETXATTR:
- {
- loc_wipe (&stub->args.setxattr.loc);
- if (stub->args.setxattr.dict)
- dict_unref (stub->args.setxattr.dict);
- break;
- }
-
+ STUB_UNWIND (stub, setxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr.name)
- GF_FREE ((char *)stub->args.getxattr.name);
- loc_wipe (&stub->args.getxattr.loc);
- break;
- }
-
+ STUB_UNWIND (stub, getxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FSETXATTR:
- {
- fd_unref (stub->args.fsetxattr.fd);
- if (stub->args.fsetxattr.dict)
- dict_unref (stub->args.fsetxattr.dict);
- break;
- }
-
+ STUB_UNWIND (stub, fsetxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr.name)
- GF_FREE ((char *)stub->args.fgetxattr.name);
- fd_unref (stub->args.fgetxattr.fd);
- break;
- }
-
+ STUB_UNWIND (stub, fgetxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_REMOVEXATTR:
- {
- loc_wipe (&stub->args.removexattr.loc);
- GF_FREE ((char *)stub->args.removexattr.name);
- break;
- }
-
+ STUB_UNWIND (stub, removexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FREMOVEXATTR:
- {
- fd_unref (stub->args.fremovexattr.fd);
- GF_FREE ((char *)stub->args.fremovexattr.name);
- break;
- }
-
+ STUB_UNWIND (stub, fremovexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_OPENDIR:
- {
- loc_wipe (&stub->args.opendir.loc);
- if (stub->args.opendir.fd)
- fd_unref (stub->args.opendir.fd);
- break;
- }
-
+ STUB_UNWIND (stub, opendir, stub->args_cbk.fd,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNCDIR:
- {
- if (stub->args.fsyncdir.fd)
- fd_unref (stub->args.fsyncdir.fd);
+ STUB_UNWIND (stub, fsyncdir, stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_ACCESS:
- {
- loc_wipe (&stub->args.access.loc);
+ STUB_UNWIND (stub, access, stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_FTRUNCATE:
- {
- if (stub->args.ftruncate.fd)
- fd_unref (stub->args.ftruncate.fd);
+ STUB_UNWIND (stub, ftruncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_FSTAT:
- {
- if (stub->args.fstat.fd)
- fd_unref (stub->args.fstat.fd);
+ STUB_UNWIND (stub, fstat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_LK:
- {
- if (stub->args.lk.fd)
- fd_unref (stub->args.lk.fd);
- break;
- }
-
+ STUB_UNWIND (stub, lk, &stub->args_cbk.lock,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_INODELK:
- {
- if (stub->args.inodelk.volume)
- GF_FREE ((char *)stub->args.inodelk.volume);
-
- loc_wipe (&stub->args.inodelk.loc);
+ STUB_UNWIND (stub, inodelk, stub->args_cbk.xdata);
break;
- }
case GF_FOP_FINODELK:
- {
- if (stub->args.finodelk.volume)
- GF_FREE ((char *)stub->args.finodelk.volume);
-
- if (stub->args.finodelk.fd)
- fd_unref (stub->args.finodelk.fd);
+ STUB_UNWIND (stub, finodelk, stub->args_cbk.xdata);
break;
- }
case GF_FOP_ENTRYLK:
- {
- if (stub->args.entrylk.volume)
- GF_FREE ((char *)stub->args.entrylk.volume);
-
- if (stub->args.entrylk.name)
- GF_FREE ((char *)stub->args.entrylk.name);
- loc_wipe (&stub->args.entrylk.loc);
- break;
- }
+ STUB_UNWIND (stub, entrylk, stub->args_cbk.xdata);
+ break;
case GF_FOP_FENTRYLK:
- {
- if (stub->args.fentrylk.volume)
- GF_FREE ((char *)stub->args.fentrylk.volume);
-
- if (stub->args.fentrylk.name)
- GF_FREE ((char *)stub->args.fentrylk.name);
-
- if (stub->args.fentrylk.fd)
- fd_unref (stub->args.fentrylk.fd);
+ STUB_UNWIND (stub, fentrylk, stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_LOOKUP:
- {
- loc_wipe (&stub->args.lookup.loc);
+ STUB_UNWIND (stub, lookup, stub->args_cbk.inode,
+ &stub->args_cbk.stat, stub->args_cbk.xdata,
+ &stub->args_cbk.postparent);
break;
- }
-
case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum.fd)
- fd_unref (stub->args.rchecksum.fd);
- break;
- }
-
+ STUB_UNWIND (stub, rchecksum, stub->args_cbk.weak_checksum,
+ stub->args_cbk.strong_checksum, stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIR:
- {
- if (stub->args.readdir.fd)
- fd_unref (stub->args.readdir.fd);
- break;
- }
-
+ STUB_UNWIND (stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp.fd)
- fd_unref (stub->args.readdirp.fd);
-
+ STUB_UNWIND (stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
break;
- }
-
case GF_FOP_XATTROP:
- {
- loc_wipe (&stub->args.xattrop.loc);
- dict_unref (stub->args.xattrop.xattr);
- break;
- }
+ STUB_UNWIND (stub, xattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop.fd)
- fd_unref (stub->args.fxattrop.fd);
- dict_unref (stub->args.fxattrop.xattr);
- break;
- }
+ STUB_UNWIND (stub, fxattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_SETATTR:
- {
- loc_wipe (&stub->args.setattr.loc);
+ STUB_UNWIND (stub, setattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
break;
- }
case GF_FOP_FSETATTR:
- {
- if (stub->args.fsetattr.fd)
- fd_unref (stub->args.fsetattr.fd);
+ STUB_UNWIND (stub, fsetattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STUB_UNWIND(stub, fallocate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STUB_UNWIND(stub, discard, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
break;
- }
+
default:
- {
gf_log_callingfn ("call-stub", GF_LOG_ERROR,
"Invalid value of FOP (%d)",
stub->fop);
break;
}
- }
+out:
+ return;
}
static void
-call_stub_destroy_unwind (call_stub_t *stub)
+call_stub_wipe_args (call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
+ loc_wipe (&stub->args.loc);
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- if (stub->args.open_cbk.fd)
- fd_unref (stub->args.open_cbk.fd);
- }
- break;
-
- case GF_FOP_CREATE:
- {
- if (stub->args.create_cbk.fd)
- fd_unref (stub->args.create_cbk.fd);
-
- if (stub->args.create_cbk.inode)
- inode_unref (stub->args.create_cbk.inode);
- }
- break;
-
- case GF_FOP_STAT:
- break;
-
- case GF_FOP_READLINK:
- {
- if (stub->args.readlink_cbk.buf)
- GF_FREE ((char *)stub->args.readlink_cbk.buf);
- }
- break;
-
- case GF_FOP_MKNOD:
- {
- if (stub->args.mknod_cbk.inode)
- inode_unref (stub->args.mknod_cbk.inode);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
- }
- break;
-
- case GF_FOP_UNLINK:
- break;
-
- case GF_FOP_RMDIR:
- break;
+ loc_wipe (&stub->args.loc2);
- case GF_FOP_SYMLINK:
- {
- if (stub->args.symlink_cbk.inode)
- inode_unref (stub->args.symlink_cbk.inode);
- }
- break;
+ if (stub->args.fd)
+ fd_unref (stub->args.fd);
- case GF_FOP_RENAME:
- break;
+ GF_FREE ((char *)stub->args.linkname);
- case GF_FOP_LINK:
- {
- if (stub->args.link_cbk.inode)
- inode_unref (stub->args.link_cbk.inode);
- }
- break;
+ GF_FREE (stub->args.vector);
- case GF_FOP_TRUNCATE:
- break;
+ if (stub->args.iobref)
+ iobref_unref (stub->args.iobref);
- case GF_FOP_READ:
- {
- if (stub->args.readv_cbk.op_ret >= 0) {
- struct iobref *iobref = stub->args.readv_cbk.iobref;
- GF_FREE (stub->args.readv_cbk.vector);
+ if (stub->args.xattr)
+ dict_unref (stub->args.xattr);
- if (iobref) {
- iobref_unref (iobref);
- }
- }
- }
- break;
+ GF_FREE ((char *)stub->args.name);
- case GF_FOP_WRITE:
- break;
+ GF_FREE ((char *)stub->args.volume);
- case GF_FOP_STATFS:
- break;
-
- case GF_FOP_FLUSH:
- break;
-
- case GF_FOP_FSYNC:
- break;
-
- case GF_FOP_SETXATTR:
- break;
-
- case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr_cbk.dict)
- dict_unref (stub->args.getxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_FSETXATTR:
- break;
-
- case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr_cbk.dict)
- dict_unref (stub->args.fgetxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_REMOVEXATTR:
- break;
- case GF_FOP_FREMOVEXATTR:
- break;
-
- case GF_FOP_OPENDIR:
- {
- if (stub->args.opendir_cbk.fd)
- fd_unref (stub->args.opendir_cbk.fd);
- }
- break;
-
- case GF_FOP_FSYNCDIR:
- break;
-
- case GF_FOP_ACCESS:
- break;
-
- case GF_FOP_FTRUNCATE:
- break;
-
- case GF_FOP_FSTAT:
- break;
-
- case GF_FOP_LK:
- break;
-
- case GF_FOP_INODELK:
- break;
-
- case GF_FOP_FINODELK:
- break;
+ if (stub->args.xdata)
+ dict_unref (stub->args.xdata);
+}
- case GF_FOP_ENTRYLK:
- break;
- case GF_FOP_FENTRYLK:
- break;
-
- case GF_FOP_LOOKUP:
- {
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
- }
- break;
+static void
+call_stub_wipe_args_cbk (call_stub_t *stub)
+{
+ if (stub->args_cbk.inode)
+ inode_unref (stub->args_cbk.inode);
- case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum_cbk.op_ret >= 0) {
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
- }
- }
- break;
+ GF_FREE ((char *)stub->args_cbk.buf);
- case GF_FOP_READDIR:
- {
- if (stub->args.readdir_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdir_cbk.entries);
- }
- }
- break;
+ GF_FREE (stub->args_cbk.vector);
- case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
- }
- }
- break;
+ if (stub->args_cbk.iobref)
+ iobref_unref (stub->args_cbk.iobref);
- case GF_FOP_XATTROP:
- {
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
- }
- break;
+ if (stub->args_cbk.fd)
+ fd_unref (stub->args_cbk.fd);
- case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
- }
- break;
+ if (stub->args_cbk.xattr)
+ dict_unref (stub->args_cbk.xattr);
- case GF_FOP_SETATTR:
- {
- break;
- }
+ GF_FREE (stub->args_cbk.strong_checksum);
- case GF_FOP_FSETATTR:
- {
- break;
- }
+ if (stub->args_cbk.xdata)
+ dict_unref (stub->args_cbk.xdata);
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
+ if (!list_empty (&stub->args_cbk.entries.list))
+ gf_dirent_free (&stub->args_cbk.entries);
}
@@ -4122,18 +2811,19 @@ call_stub_destroy (call_stub_t *stub)
{
GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
- if (stub->wind) {
- call_stub_destroy_wind (stub);
- } else {
- call_stub_destroy_unwind (stub);
- }
+ if (stub->wind)
+ call_stub_wipe_args (stub);
+ else
+ call_stub_wipe_args_cbk (stub);
stub->stub_mem_pool = NULL;
+
mem_put (stub);
out:
return;
}
+
void
call_resume (call_stub_t *stub)
{
@@ -4158,3 +2848,26 @@ call_resume (call_stub_t *stub)
out:
return;
}
+
+
+void
+call_unwind_error (call_stub_t *stub, int op_ret, int op_errno)
+{
+ xlator_t *old_THIS = NULL;
+
+ list_del_init (&stub->list);
+
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind (stub);
+ }
+ THIS = old_THIS;
+
+ call_stub_destroy (stub);
+
+ return;
+
+}
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
index 633fc4cbb..45bef8044 100644
--- a/libglusterfs/src/call-stub.h
+++ b/libglusterfs/src/call-stub.h
@@ -26,568 +26,158 @@ typedef struct {
call_frame_t *frame;
glusterfs_fop_t fop;
struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
- dict_t *xdata; /* common accross all the fops */
union {
- /* lookup */
- struct {
- fop_lookup_t fn;
- loc_t loc;
- } lookup;
- struct {
- fop_lookup_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- } lookup_cbk;
-
- /* stat */
- struct {
- fop_stat_t fn;
- loc_t loc;
- } stat;
- struct {
- fop_stat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } stat_cbk;
-
- /* fstat */
- struct {
- fop_fstat_t fn;
- fd_t *fd;
- } fstat;
- struct {
- fop_fstat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } fstat_cbk;
-
- /* truncate */
- struct {
- fop_truncate_t fn;
- loc_t loc;
- off_t off;
- } truncate;
- struct {
- fop_truncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate_cbk;
-
- /* ftruncate */
- struct {
- fop_ftruncate_t fn;
- fd_t *fd;
- off_t off;
- } ftruncate;
- struct {
- fop_ftruncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate_cbk;
-
- /* access */
- struct {
- fop_access_t fn;
- loc_t loc;
- int32_t mask;
- } access;
- struct {
- fop_access_cbk_t fn;
- int32_t op_ret, op_errno;
- } access_cbk;
-
- /* readlink */
- struct {
- fop_readlink_t fn;
- loc_t loc;
- size_t size;
- } readlink;
- struct {
- fop_readlink_cbk_t fn;
- int32_t op_ret, op_errno;
- const char *buf;
- struct iatt sbuf;
- } readlink_cbk;
-
- /* mknod */
- struct {
- fop_mknod_t fn;
- loc_t loc;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
- } mknod;
- struct {
- fop_mknod_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mknod_cbk;
-
- /* mkdir */
- struct {
- fop_mkdir_t fn;
- loc_t loc;
- mode_t mode;
- mode_t umask;
- } mkdir;
- struct {
- fop_mkdir_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir_cbk;
-
- /* unlink */
- struct {
- fop_unlink_t fn;
- loc_t loc;
- int xflag;
- } unlink;
- struct {
- fop_unlink_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink_cbk;
-
- /* rmdir */
- struct {
- fop_rmdir_t fn;
- loc_t loc;
- int flags;
- } rmdir;
- struct {
- fop_rmdir_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir_cbk;
-
- /* symlink */
- struct {
- fop_symlink_t fn;
- const char *linkname;
- loc_t loc;
- mode_t umask;
- } symlink;
- struct {
- fop_symlink_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } symlink_cbk;
-
- /* rename */
- struct {
- fop_rename_t fn;
- loc_t old;
- loc_t new;
- } rename;
- struct {
- fop_rename_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt prenewparent;
- struct iatt postnewparent;
- } rename_cbk;
-
- /* link */
- struct {
- fop_link_t fn;
- loc_t oldloc;
- loc_t newloc;
- } link;
- struct {
- fop_link_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } link_cbk;
-
- /* create */
- struct {
- fop_create_t fn;
- loc_t loc;
- int32_t flags;
- mode_t mode;
- fd_t *fd;
- mode_t umask;
- } create;
- struct {
- fop_create_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } create_cbk;
-
- /* open */
- struct {
- fop_open_t fn;
- loc_t loc;
- int32_t flags;
- fd_t *fd;
- } open;
- struct {
- fop_open_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } open_cbk;
-
- /* readv */
- struct {
- fop_readv_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- uint32_t flags;
- } readv;
- struct {
- fop_readv_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iovec *vector;
- int32_t count;
- struct iatt stbuf;
- struct iobref *iobref;
- } readv_cbk;
-
- /* writev */
- struct {
- fop_writev_t fn;
- fd_t *fd;
- struct iovec *vector;
- int32_t count;
- off_t off;
- uint32_t flags;
- struct iobref *iobref;
- } writev;
- struct {
- fop_writev_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } writev_cbk;
-
- /* flush */
- struct {
- fop_flush_t fn;
- fd_t *fd;
- } flush;
- struct {
- fop_flush_cbk_t fn;
- int32_t op_ret, op_errno;
- } flush_cbk;
-
- /* fsync */
- struct {
- fop_fsync_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsync;
- struct {
- fop_fsync_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync_cbk;
-
- /* opendir */
- struct {
- fop_opendir_t fn;
- loc_t loc;
- fd_t *fd;
- } opendir;
- struct {
- fop_opendir_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } opendir_cbk;
-
-
- /* fsyncdir */
- struct {
- fop_fsyncdir_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsyncdir;
- struct {
- fop_fsyncdir_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsyncdir_cbk;
-
- /* statfs */
- struct {
- fop_statfs_t fn;
- loc_t loc;
- } statfs;
- struct {
- fop_statfs_cbk_t fn;
- int32_t op_ret, op_errno;
- struct statvfs buf;
- } statfs_cbk;
-
- /* setxattr */
- struct {
- fop_setxattr_t fn;
- loc_t loc;
- dict_t *dict;
- int32_t flags;
- } setxattr;
- struct {
- fop_setxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } setxattr_cbk;
-
- /* getxattr */
- struct {
- fop_getxattr_t fn;
- loc_t loc;
- const char *name;
- } getxattr;
- struct {
- fop_getxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } getxattr_cbk;
-
- /* fsetxattr */
- struct {
- fop_fsetxattr_t fn;
- fd_t *fd;
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
- struct {
- fop_fsetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsetxattr_cbk;
-
- /* fgetxattr */
- struct {
- fop_fgetxattr_t fn;
- fd_t *fd;
- const char *name;
- } fgetxattr;
- struct {
- fop_fgetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } fgetxattr_cbk;
-
- /* removexattr */
- struct {
- fop_removexattr_t fn;
- loc_t loc;
- const char *name;
- } removexattr;
- struct {
- fop_removexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } removexattr_cbk;
-
-
- /* fremovexattr */
- struct {
- fop_fremovexattr_t fn;
- fd_t *fd;
- const char *name;
- } fremovexattr;
- struct {
- fop_fremovexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fremovexattr_cbk;
-
- /* lk */
- struct {
- fop_lk_t fn;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } lk;
- struct {
- fop_lk_cbk_t fn;
- int32_t op_ret, op_errno;
- struct gf_flock lock;
- } lk_cbk;
-
- /* inodelk */
- struct {
- fop_inodelk_t fn;
- const char *volume;
- loc_t loc;
- int32_t cmd;
- struct gf_flock lock;
- } inodelk;
-
- struct {
- fop_inodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } inodelk_cbk;
-
- /* finodelk */
- struct {
- fop_finodelk_t fn;
- const char *volume;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } finodelk;
-
- struct {
- fop_finodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } finodelk_cbk;
-
- /* entrylk */
- struct {
- fop_entrylk_t fn;
- loc_t loc;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } entrylk;
-
- struct {
- fop_entrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } entrylk_cbk;
-
- /* fentrylk */
- struct {
- fop_fentrylk_t fn;
- fd_t *fd;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } fentrylk;
-
- struct {
- fop_fentrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } fentrylk_cbk;
-
- /* readdir */
- struct {
- fop_readdir_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdir;
- struct {
- fop_readdir_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdir_cbk;
-
- /* readdirp */
- struct {
- fop_readdirp_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdirp;
- struct {
- fop_readdirp_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdirp_cbk;
-
- /* rchecksum */
- struct {
- fop_rchecksum_t fn;
- fd_t *fd;
- off_t offset;
- int32_t len;
- } rchecksum;
- struct {
- fop_rchecksum_cbk_t fn;
- int32_t op_ret, op_errno;
- uint32_t weak_checksum;
- uint8_t *strong_checksum;
- } rchecksum_cbk;
-
- /* xattrop */
- struct {
- fop_xattrop_t fn;
- loc_t loc;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } xattrop;
- struct {
- fop_xattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } xattrop_cbk;
-
- /* fxattrop */
- struct {
- fop_fxattrop_t fn;
- fd_t *fd;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } fxattrop;
- struct {
- fop_fxattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } fxattrop_cbk;
-
- /* setattr */
- struct {
- fop_setattr_t fn;
- loc_t loc;
- struct iatt stbuf;
- int32_t valid;
- } setattr;
- struct {
- fop_setattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } setattr_cbk;
-
- /* fsetattr */
- struct {
- fop_fsetattr_t fn;
- fd_t *fd;
- struct iatt stbuf;
- int32_t valid;
- } fsetattr;
- struct {
- fop_fsetattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } fsetattr_cbk;
+ fop_lookup_t lookup;
+ fop_stat_t stat;
+ fop_fstat_t fstat;
+ fop_truncate_t truncate;
+ fop_ftruncate_t ftruncate;
+ fop_access_t access;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_create_t create;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_statfs_t statfs;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_removexattr_t removexattr;
+ fop_fremovexattr_t fremovexattr;
+ fop_lk_t lk;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_readdir_t readdir;
+ fop_readdirp_t readdirp;
+ fop_rchecksum_t rchecksum;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ } fn;
+ union {
+ fop_lookup_cbk_t lookup;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_access_cbk_t access;
+ fop_readlink_cbk_t readlink;
+ fop_mknod_cbk_t mknod;
+ fop_mkdir_cbk_t mkdir;
+ fop_unlink_cbk_t unlink;
+ fop_rmdir_cbk_t rmdir;
+ fop_symlink_cbk_t symlink;
+ fop_rename_cbk_t rename;
+ fop_link_cbk_t link;
+ fop_create_cbk_t create;
+ fop_open_cbk_t open;
+ fop_readv_cbk_t readv;
+ fop_writev_cbk_t writev;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_opendir_cbk_t opendir;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_statfs_cbk_t statfs;
+ fop_setxattr_cbk_t setxattr;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_lk_cbk_t lk;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_rchecksum_cbk_t rchecksum;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_fallocate_cbk_t fallocate;
+ fop_discard_cbk_t discard;
+ fop_zerofill_cbk_t zerofill;
+ } fn_cbk;
+
+ struct {
+ loc_t loc; // @old in rename(), link()
+ loc_t loc2; // @new in rename(), link()
+ fd_t *fd;
+ off_t offset;
+ int mask;
+ size_t size;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ int xflag;
+ int flags;
+ const char *linkname;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ int datasync;
+ dict_t *xattr;
+ const char *name;
+ int cmd;
+ struct gf_flock lock;
+ const char *volume;
+ entrylk_cmd entrylkcmd;
+ entrylk_type entrylktype;
+ gf_xattrop_flags_t optype;
+ int valid;
+ struct iatt stat;
+ dict_t *xdata;
} args;
+
+ struct {
+ int op_ret;
+ int op_errno;
+ inode_t *inode;
+ struct iatt stat;
+ struct iatt prestat;
+ struct iatt poststat;
+ struct iatt preparent; // @preoldparent in rename_cbk
+ struct iatt postparent; // @postoldparent in rename_cbk
+ struct iatt preparent2; // @prenewparent in rename_cbk
+ struct iatt postparent2; // @postnewparent in rename_cbk
+ const char *buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ fd_t *fd;
+ struct statvfs statvfs;
+ dict_t *xattr;
+ struct gf_flock lock;
+ gf_dirent_t entries;
+ uint32_t weak_checksum;
+ uint8_t *strong_checksum;
+ dict_t *xdata;
+ } args_cbk;
} call_stub_t;
+
call_stub_t *
fop_lookup_stub (call_frame_t *frame,
fop_lookup_t fn,
@@ -1129,6 +719,49 @@ fop_fsetattr_cbk_stub (call_frame_t *frame,
struct iatt *statpre,
struct iatt *statpost, dict_t *xdata);
+call_stub_t *
+fop_fallocate_stub(call_frame_t *frame,
+ fop_fallocate_t fn,
+ fd_t *fd,
+ int32_t mode, off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_cbk_stub(call_frame_t *frame,
+ fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+call_stub_t *
+fop_discard_stub(call_frame_t *frame,
+ fop_discard_t fn,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_discard_cbk_stub(call_frame_t *frame,
+ fop_discard_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_stub(call_frame_t *frame,
+ fop_zerofill_t fn,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame,
+ fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
void call_resume (call_stub_t *stub);
void call_stub_destroy (call_stub_t *stub);
+void call_unwind_error (call_stub_t *stub, int op_ret, int op_errno);
#endif
diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c
index 6c7907a09..484ce7dc9 100644
--- a/libglusterfs/src/circ-buff.c
+++ b/libglusterfs/src/circ-buff.c
@@ -10,6 +10,17 @@
#include "circ-buff.h"
+void
+cb_destroy_data (circular_buffer_t *cb,
+ void (*destroy_buffer_data) (void *data))
+{
+ if (destroy_buffer_data)
+ destroy_buffer_data (cb->data);
+ GF_FREE (cb->data);
+ return;
+}
+
+
/* hold lock while calling this function */
int
__cb_add_entry_buffer (buffer_t *buffer, void *item)
@@ -29,7 +40,8 @@ __cb_add_entry_buffer (buffer_t *buffer, void *item)
if (buffer->cb[buffer->w_index]) {
ptr = buffer->cb[buffer->w_index];
if (ptr->data) {
- GF_FREE (ptr->data);
+ cb_destroy_data (ptr,
+ buffer->destroy_buffer_data);
ptr->data = NULL;
GF_FREE (ptr);
}
@@ -50,7 +62,7 @@ __cb_add_entry_buffer (buffer_t *buffer, void *item)
gf_log_callingfn ("", GF_LOG_WARNING, "getting time of"
"the day failed");
buffer->w_index++;
- buffer->w_index %= buffer->size_buffer - 1;
+ buffer->w_index %= buffer->size_buffer;
//used_buffer size cannot be greater than the total buffer size
if (buffer->used_len < buffer->size_buffer)
@@ -90,22 +102,35 @@ void
cb_buffer_dump (buffer_t *buffer, void *data,
int (fn) (circular_buffer_t *buffer, void *data))
{
- int i = 0;
+ int index = 0;
circular_buffer_t *entry = NULL;
int entries = 0;
+ int ul = 0;
+ int w_ind = 0;
+ int size_buff = 0;
+ int i = 0;
+
+ ul = buffer->used_len;
+ w_ind = buffer->w_index;
+ size_buff = buffer->size_buffer;
pthread_mutex_lock (&buffer->lock);
{
if (buffer->use_once == _gf_false) {
- for (i = (buffer->w_index - 1) ; entries <
- buffer->used_len ; entries++) {
- entry = buffer->cb[i];
+ index = (size_buff + (w_ind - ul))%size_buff;
+ for (entries = 0; entries < buffer->used_len;
+ entries++) {
+ entry = buffer->cb[index];
if (entry)
fn (entry, data);
- if (0 == i)
- i = buffer->used_len - 1;
else
- i = (i - 1) % (buffer->used_len - 1);
+ gf_log_callingfn ("", GF_LOG_WARNING,
+ "Null entry in "
+ "circular buffer at "
+ "index %d.", index);
+
+ index++;
+ index %= buffer->size_buffer;
}
} else {
for (i = 0; i < buffer->used_len ; i++) {
@@ -118,7 +143,8 @@ cb_buffer_dump (buffer_t *buffer, void *data,
}
buffer_t *
-cb_buffer_new (size_t buffer_size, gf_boolean_t use_once)
+cb_buffer_new (size_t buffer_size, gf_boolean_t use_once,
+ void (*destroy_buffer_data) (void *data))
{
buffer_t *buffer = NULL;
@@ -144,6 +170,7 @@ cb_buffer_new (size_t buffer_size, gf_boolean_t use_once)
buffer->size_buffer = buffer_size;
buffer->use_once = use_once;
buffer->used_len = 0;
+ buffer->destroy_buffer_data = destroy_buffer_data;
pthread_mutex_init (&buffer->lock, NULL);
out:
@@ -153,13 +180,18 @@ out:
void
cb_buffer_destroy (buffer_t *buffer)
{
- int i = 0;
-
+ int i = 0;
+ circular_buffer_t *ptr = NULL;
if (buffer) {
if (buffer->cb) {
for (i = 0; i < buffer->used_len ; i++) {
- if (buffer->cb[i])
- GF_FREE (buffer->cb[i]);
+ ptr = buffer->cb[i];
+ if (ptr->data) {
+ cb_destroy_data (ptr,
+ buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE (ptr);
+ }
}
GF_FREE (buffer->cb);
}
diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h
index 5b5acc387..e3459f5e3 100644
--- a/libglusterfs/src/circ-buff.h
+++ b/libglusterfs/src/circ-buff.h
@@ -38,7 +38,7 @@ struct _buffer {
/* indicates the amount of circular buffer used. */
circular_buffer_t **cb;
-
+ void (*destroy_buffer_data) (void *data);
pthread_mutex_t lock;
};
@@ -51,7 +51,8 @@ void
cb_buffer_show (buffer_t *buffer);
buffer_t *
-cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once);
+cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once,
+ void (*destroy_data) (void *data));
void
cb_buffer_destroy (buffer_t *buffer);
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
new file mode 100644
index 000000000..06447dc5d
--- /dev/null
+++ b/libglusterfs/src/client_t.c
@@ -0,0 +1,890 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs.h"
+#include "dict.h"
+#include "statedump.h"
+#include "client_t.h"
+#include "list.h"
+#include "rpcsvc.h"
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+static int
+gf_client_chain_client_entries (cliententry_t *entries, uint32_t startidx,
+ uint32_t endcount)
+{
+ uint32_t i = 0;
+
+ if (!entries) {
+ gf_log_callingfn ("client_t", GF_LOG_WARNING, "!entries");
+ return -1;
+ }
+
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_CLIENTTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
+
+ /* i has already been incremented upto the last entry. */
+ entries[i].next_free = GF_CLIENTTABLE_END;
+
+ return 0;
+}
+
+
+static int
+gf_client_clienttable_expand (clienttable_t *clienttable, uint32_t nr)
+{
+ cliententry_t *oldclients = NULL;
+ uint32_t oldmax_clients = -1;
+ int ret = -1;
+
+ if (clienttable == NULL || nr < 0) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ /* expand size by power-of-two...
+ this originally came from .../xlators/protocol/server/src/server.c
+ where it was not commented */
+ nr /= (1024 / sizeof (cliententry_t));
+ nr = gf_roundup_next_power_of_two (nr + 1);
+ nr *= (1024 / sizeof (cliententry_t));
+
+ oldclients = clienttable->cliententries;
+ oldmax_clients = clienttable->max_clients;
+
+ clienttable->cliententries = GF_CALLOC (nr, sizeof (cliententry_t),
+ gf_common_mt_cliententry_t);
+ if (!clienttable->cliententries) {
+ ret = ENOMEM;
+ goto out;
+ }
+ clienttable->max_clients = nr;
+
+ if (oldclients) {
+ uint32_t cpy = oldmax_clients * sizeof (cliententry_t);
+ memcpy (clienttable->cliententries, oldclients, cpy);
+ }
+
+ gf_client_chain_client_entries (clienttable->cliententries, oldmax_clients,
+ clienttable->max_clients);
+
+ /* Now that expansion is done, we must update the client list
+ * head pointer so that the client allocation functions can continue
+ * using the expanded table.
+ */
+ clienttable->first_free = oldmax_clients;
+ GF_FREE (oldclients);
+ ret = 0;
+out:
+ return ret;
+}
+
+
+clienttable_t *
+gf_clienttable_alloc (void)
+{
+ clienttable_t *clienttable = NULL;
+
+ clienttable =
+ GF_CALLOC (1, sizeof (clienttable_t), gf_common_mt_clienttable_t);
+ if (!clienttable)
+ return NULL;
+
+ LOCK_INIT (&clienttable->lock);
+ gf_client_clienttable_expand (clienttable, GF_CLIENTTABLE_INITIAL_SIZE);
+ return clienttable;
+}
+
+
+void
+gf_client_clienttable_destroy (clienttable_t *clienttable)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententries = NULL;
+ uint32_t client_count = 0;
+ int32_t i = 0;
+
+ if (!clienttable) {
+ gf_log_callingfn ("client_t", GF_LOG_WARNING, "!clienttable");
+ return;
+ }
+
+ LOCK (&clienttable->lock);
+ {
+ client_count = clienttable->max_clients;
+ clienttable->max_clients = 0;
+ cliententries = clienttable->cliententries;
+ clienttable->cliententries = NULL;
+ }
+ UNLOCK (&clienttable->lock);
+
+ if (cliententries != NULL) {
+ for (i = 0; i < client_count; i++) {
+ client = cliententries[i].client;
+ if (client != NULL) {
+ gf_client_unref (client);
+ }
+ }
+
+ GF_FREE (cliententries);
+ LOCK_DESTROY (&clienttable->lock);
+ GF_FREE (clienttable);
+ }
+}
+
+client_t *
+gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententry = NULL;
+ clienttable_t *clienttable = NULL;
+ unsigned int i = 0;
+
+ if (this == NULL || client_uid == NULL) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "client_uid=%s", client_uid);
+
+ clienttable = this->ctx->clienttable;
+
+ LOCK (&clienttable->lock);
+ {
+ for (; i < clienttable->max_clients; i++) {
+ client = clienttable->cliententries[i].client;
+ if (client == NULL)
+ continue;
+ /*
+ * look for matching client_uid, _and_
+ * if auth was used, matching auth flavour and data
+ */
+ if (strcmp (client_uid, client->client_uid) == 0 &&
+ (cred->flavour != AUTH_NONE &&
+ (cred->flavour == client->auth.flavour &&
+ (size_t) cred->datalen == client->auth.len &&
+ memcmp (cred->authdata,
+ client->auth.data,
+ client->auth.len) == 0))) {
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ __sync_add_and_fetch(&client->ref.bind, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ ++client->ref.bind;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ break;
+ }
+ }
+ if (client) {
+ gf_client_ref (client);
+ goto unlock;
+ }
+ client = GF_CALLOC (1, sizeof(client_t), gf_common_mt_client_t);
+ if (client == NULL) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ client->this = this;
+
+ LOCK_INIT (&client->scratch_ctx.lock);
+ LOCK_INIT (&client->ref.lock);
+
+ client->client_uid = gf_strdup (client_uid);
+ if (client->client_uid == NULL) {
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ client->scratch_ctx.count = GF_CLIENTCTX_INITIAL_SIZE;
+ client->scratch_ctx.ctx =
+ GF_CALLOC (GF_CLIENTCTX_INITIAL_SIZE,
+ sizeof (struct client_ctx),
+ gf_common_mt_client_ctx);
+ if (client->scratch_ctx.ctx == NULL) {
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ /* no need to do these atomically here */
+ client->ref.bind = client->ref.count = 1;
+
+ client->auth.flavour = cred->flavour;
+ if (cred->flavour != AUTH_NONE) {
+ client->auth.data =
+ GF_CALLOC (1, cred->datalen,
+ gf_common_mt_client_t);
+ if (client->auth.data == NULL) {
+ GF_FREE (client->scratch_ctx.ctx);
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ memcpy (client->auth.data, cred->authdata,
+ cred->datalen);
+ client->auth.len = cred->datalen;
+ }
+
+ client->tbl_index = clienttable->first_free;
+ cliententry = &clienttable->cliententries[client->tbl_index];
+ cliententry->client = client;
+ clienttable->first_free = cliententry->next_free;
+ cliententry->next_free = GF_CLIENTENTRY_ALLOCATED;
+ gf_client_ref (client);
+ }
+unlock:
+ UNLOCK (&clienttable->lock);
+
+ return client;
+}
+
+void
+gf_client_put (client_t *client, gf_boolean_t *detached)
+{
+ gf_boolean_t unref = _gf_false;
+ int bind_ref;
+
+ if (detached)
+ *detached = _gf_false;
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ bind_ref = __sync_sub_and_fetch(&client->ref.bind, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ bind_ref = --client->ref.bind;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ if (bind_ref == 0)
+ unref = _gf_true;
+
+ if (unref) {
+ gf_log (THIS->name, GF_LOG_INFO, "Shutting down connection %s",
+ client->client_uid);
+ if (detached)
+ *detached = _gf_true;
+ gf_client_unref (client);
+ }
+}
+
+
+client_t *
+gf_client_ref (client_t *client)
+{
+ if (!client) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "null client");
+ return NULL;
+ }
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ __sync_add_and_fetch(&client->ref.count, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ ++client->ref.count;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ return client;
+}
+
+
+static void
+client_destroy (client_t *client)
+{
+ clienttable_t *clienttable = NULL;
+ glusterfs_graph_t *gtrav = NULL;
+ xlator_t *xtrav = NULL;
+
+ if (client == NULL){
+ gf_log_callingfn ("xlator", GF_LOG_ERROR, "invalid argument");
+ goto out;
+ }
+
+ clienttable = client->this->ctx->clienttable;
+
+ LOCK_DESTROY (&client->scratch_ctx.lock);
+ LOCK_DESTROY (&client->ref.lock);
+
+ LOCK (&clienttable->lock);
+ {
+ clienttable->cliententries[client->tbl_index].client = NULL;
+ clienttable->cliententries[client->tbl_index].next_free =
+ clienttable->first_free;
+ clienttable->first_free = client->tbl_index;
+ }
+ UNLOCK (&clienttable->lock);
+
+ list_for_each_entry (gtrav, &client->this->ctx->graphs, list) {
+ xtrav = gtrav->top;
+ while (xtrav != NULL) {
+ if (xtrav->cbks->client_destroy != NULL)
+ xtrav->cbks->client_destroy (xtrav, client);
+ xtrav = xtrav->next;
+ }
+ }
+ GF_FREE (client->auth.data);
+ GF_FREE (client->scratch_ctx.ctx);
+ GF_FREE (client->client_uid);
+ GF_FREE (client);
+out:
+ return;
+}
+
+
+int
+gf_client_disconnect (client_t *client)
+{
+ int ret = 0;
+ glusterfs_graph_t *gtrav = NULL;
+ xlator_t *xtrav = NULL;
+
+ list_for_each_entry (gtrav, &client->this->ctx->graphs, list) {
+ xtrav = gtrav->top;
+ while (xtrav != NULL) {
+ if (xtrav->cbks->client_disconnect != NULL)
+ if (xtrav->cbks->client_disconnect (xtrav, client) != 0)
+ ret = -1;
+ xtrav = xtrav->next;
+ }
+ }
+
+ return ret;
+}
+
+
+void
+gf_client_unref (client_t *client)
+{
+ int refcount;
+
+ if (!client) {
+ gf_log_callingfn ("client_t", GF_LOG_ERROR, "client is NULL");
+ return;
+ }
+
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
+ refcount = __sync_sub_and_fetch(&client->ref.count, 1);
+#else
+ LOCK (&client->ref.lock);
+ {
+ refcount = --client->ref.count;
+ }
+ UNLOCK (&client->ref.lock);
+#endif
+ if (refcount == 0) {
+ client_destroy (client);
+ }
+}
+
+
+static int
+client_ctx_set_int (client_t *client, void *key, void *value)
+{
+ int index = 0;
+ int ret = 0;
+ int set_idx = -1;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (!client->scratch_ctx.ctx[index].ctx_key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* dont break, to check if key already exists
+ further on */
+ }
+ if (client->scratch_ctx.ctx[index].ctx_key == key) {
+ set_idx = index;
+ break;
+ }
+ }
+
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ client->scratch_ctx.ctx[set_idx].ctx_key = key;
+ client->scratch_ctx.ctx[set_idx].ctx_value = value;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_set (client_t *client, void *key, void *value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_set_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+static int
+client_ctx_get_int (client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_get (client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_get_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+static int
+client_ctx_del_int (client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+ client->scratch_ctx.ctx[index].ctx_key = 0;
+ client->scratch_ctx.ctx[index].ctx_value = 0;
+
+out:
+ return ret;
+}
+
+
+int
+client_ctx_del (client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK (&client->scratch_ctx.lock);
+ {
+ ret = client_ctx_del_int (client, key, value);
+ }
+ UNLOCK (&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+
+void
+client_dump (client_t *client, char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ if (!client)
+ return;
+
+ memset(key, 0, sizeof key);
+ gf_proc_dump_write("refcount", "%d", client->ref.count);
+}
+
+
+void
+cliententry_dump (cliententry_t *cliententry, char *prefix)
+{
+ if (!cliententry)
+ return;
+
+ if (GF_CLIENTENTRY_ALLOCATED != cliententry->next_free)
+ return;
+
+ if (cliententry->client)
+ client_dump(cliententry->client, prefix);
+}
+
+
+void
+clienttable_dump (clienttable_t *clienttable, char *prefix)
+{
+ int i = 0;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+
+ if (!clienttable)
+ return;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return;
+ }
+ memset(key, 0, sizeof key);
+ gf_proc_dump_build_key(key, prefix, "maxclients");
+ gf_proc_dump_write(key, "%d", clienttable->max_clients);
+ gf_proc_dump_build_key(key, prefix, "first_free");
+ gf_proc_dump_write(key, "%d", clienttable->first_free);
+ for ( i = 0 ; i < clienttable->max_clients; i++) {
+ if (GF_CLIENTENTRY_ALLOCATED ==
+ clienttable->cliententries[i].next_free) {
+ gf_proc_dump_build_key(key, prefix,
+ "cliententry[%d]", i);
+ gf_proc_dump_add_section(key);
+ cliententry_dump(&clienttable->cliententries[i],
+ key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+}
+
+
+void
+client_ctx_dump (client_t *client, char *prefix)
+{
+#if 0 /* TBD, FIXME */
+ struct client_ctx *client_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((client == NULL) || (client->ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK (&client->ctx_lock);
+ if (client->ctx != NULL) {
+ client_ctx = GF_CALLOC (client->inode->table->xl->graph->ctx_count,
+ sizeof (*client_ctx),
+ gf_common_mt_client_ctx);
+ if (client_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ client_ctx[i] = client->ctx[i];
+ }
+ }
+unlock:
+ UNLOCK (&client->ctx_lock);
+
+ if (client_ctx == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ if (client_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)client_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->clientctx)
+ xl->dumpops->clientctx (xl, client);
+ }
+ }
+out:
+ GF_FREE (client_ctx);
+#endif
+}
+
+
+/*
+ * the following functions are here to preserve legacy behavior of the
+ * protocol/server xlator dump, but perhaps they should just be folded
+ * into the client dump instead?
+ */
+int
+gf_client_dump_fdtables_to_dict (xlator_t *this, dict_t *dict)
+{
+ clienttable_t *clienttable = NULL;
+ int count = 0;
+ int ret = -1;
+#ifdef NOTYET
+ client_t *client = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+#endif
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+#ifdef NOTYET
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ snprintf (key, sizeof key, "conn%d", count++);
+ fdtable_dump_to_dict (client->server_ctx.fdtable,
+ key, dict);
+ }
+ }
+ UNLOCK(&clienttable->lock);
+#endif
+
+ ret = dict_set_int32 (dict, "conncount", count);
+out:
+ return ret;
+}
+
+int
+gf_client_dump_fdtables (xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+
+
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->client_uid) {
+ gf_proc_dump_build_key (key, "conn",
+ "%d.id", count);
+ gf_proc_dump_write (key, "%s",
+ client->client_uid);
+ }
+
+ gf_proc_dump_build_key (key, "conn", "%d.ref",
+ count);
+ gf_proc_dump_write (key, "%d", client->ref.count);
+ if (client->bound_xl) {
+ gf_proc_dump_build_key (key, "conn",
+ "%d.bound_xl", count);
+ gf_proc_dump_write (key, "%s",
+ client->bound_xl->name);
+ }
+
+#ifdef NOTYET
+ gf_proc_dump_build_key (key, "conn","%d.id", count);
+ fdtable_dump (client->server_ctx.fdtable, key);
+#endif
+ }
+ }
+
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ char key[32] = {0,};
+ int count = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO (this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ memset (key, 0, sizeof (key));
+ snprintf (key, sizeof (key), "conn%d", count);
+ inode_table_dump_to_dict (client->bound_xl->itable,
+ key, dict);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = dict_set_int32 (dict, "conncount", count);
+
+out:
+ if (prev_bound_xl)
+ prev_bound_xl = NULL;
+ return ret;
+}
+
+int
+gf_client_dump_inodes (xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+
+ GF_VALIDATE_OR_GOTO (THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ goto out;
+
+ ret = TRY_LOCK (&clienttable->lock);
+ {
+ if (ret) {
+ gf_log ("client_t", GF_LOG_WARNING,
+ "Unable to acquire lock");
+ goto out;
+ }
+
+ for ( ; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ memset(key, 0, sizeof key);
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ gf_proc_dump_build_key(key, "conn",
+ "%d.bound_xl.%s", count,
+ client->bound_xl->name);
+ inode_table_dump(client->bound_xl->itable,key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h
new file mode 100644
index 000000000..f7812f8f0
--- /dev/null
+++ b/libglusterfs/src/client_t.h
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLIENT_T_H
+#define _CLIENT_T_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "locking.h" /* for gf_lock_t, not included by glusterfs.h */
+
+struct client_ctx {
+ void *ctx_key;
+ void *ctx_value;
+};
+
+typedef struct _client_t {
+ struct {
+ /* e.g. protocol/server stashes its ctx here */
+ gf_lock_t lock;
+ unsigned short count;
+ struct client_ctx *ctx;
+ } scratch_ctx;
+ struct {
+ gf_lock_t lock;
+ volatile int bind;
+ volatile int count;
+ } ref;
+ xlator_t *bound_xl;
+ xlator_t *this;
+ int tbl_index;
+ char *client_uid;
+ struct {
+ int flavour;
+ size_t len;
+ char *data;
+ } auth;
+} client_t;
+
+#define GF_CLIENTCTX_INITIAL_SIZE 8
+
+struct client_table_entry {
+ client_t *client;
+ int next_free;
+};
+typedef struct client_table_entry cliententry_t;
+
+struct clienttable {
+ unsigned int max_clients;
+ gf_lock_t lock;
+ cliententry_t *cliententries;
+ int first_free;
+};
+typedef struct clienttable clienttable_t;
+
+#define GF_CLIENTTABLE_INITIAL_SIZE 32
+
+/* Signifies no more entries in the client table. */
+#define GF_CLIENTTABLE_END -1
+
+/* This is used to invalidate
+ * the next_free value in an cliententry that has been allocated
+ */
+#define GF_CLIENTENTRY_ALLOCATED -2
+
+struct rpcsvc_auth_data;
+
+client_t *
+gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid);
+
+void
+gf_client_put (client_t *client, gf_boolean_t *detached);
+
+clienttable_t *
+gf_clienttable_alloc (void);
+
+void
+gf_client_clienttable_destroy (clienttable_t *clienttable);
+
+client_t *
+gf_client_ref (client_t *client);
+
+void
+gf_client_unref (client_t *client);
+
+int
+gf_client_dump_fdtable_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtable (xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes (xlator_t *this);
+
+int
+client_ctx_set (client_t *client, void *key, void *value);
+
+int
+client_ctx_get (client_t *client, void *key, void **value);
+
+int
+client_ctx_del (client_t *client, void *key, void **value);
+
+void
+client_ctx_dump (client_t *client, char *prefix);
+
+int
+gf_client_dump_fdtables_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtables (xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict (xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes (xlator_t *this);
+
+int
+gf_client_disconnect (client_t *client);
+
+#endif /* _CLIENT_T_H */
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index c9c396762..827475282 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -44,6 +44,8 @@
#include "stack.h"
#include "globals.h"
#include "lkowner.h"
+#include "syscall.h"
+#include <ifaddrs.h>
#ifndef AI_ADDRCONFIG
#define AI_ADDRCONFIG 0
@@ -57,6 +59,16 @@ struct dnscache6 {
struct addrinfo *next;
};
+void
+md5_wrapper(const unsigned char *data, size_t len, char *md5)
+{
+ unsigned short i = 0;
+ unsigned short lim = MD5_DIGEST_LENGTH*2+1;
+ unsigned char scratch[MD5_DIGEST_LENGTH] = {0,};
+ MD5(data, len, scratch);
+ for (; i < MD5_DIGEST_LENGTH; i++)
+ snprintf(md5 + i * 2, lim-i*2, "%02x", scratch[i]);
+}
/* works similar to mkdir(1) -p.
*/
@@ -113,6 +125,35 @@ out:
}
int
+gf_lstat_dir (const char *path, struct stat *stbuf_in)
+{
+ int ret = -1;
+ struct stat stbuf = {0,};
+
+ if (path == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_lstat (path, &stbuf);
+ if (ret)
+ goto out;
+
+ if (!S_ISDIR (stbuf.st_mode)) {
+ errno = ENOTDIR;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (!ret && stbuf_in)
+ *stbuf_in = stbuf;
+
+ return ret;
+}
+
+int
log_base2 (unsigned long x)
{
int val = 0;
@@ -167,7 +208,9 @@ gf_resolve_ip6 (const char *hostname,
memset(&hints, 0, sizeof(hints));
hints.ai_family = family;
hints.ai_socktype = SOCK_STREAM;
+#ifndef __NetBSD__
hints.ai_flags = AI_ADDRCONFIG;
+#endif
ret = gf_asprintf (&port_str, "%d", port);
if (-1 == ret) {
@@ -236,28 +279,101 @@ err:
}
+struct xldump {
+ int lineno;
+ FILE *logfp;
+};
+
+
+static int
+nprintf (struct xldump *dump, const char *fmt, ...)
+{
+ va_list ap;
+ int ret = 0;
+
+
+ ret += fprintf (dump->logfp, "%3d: ", ++dump->lineno);
+
+ va_start (ap, fmt);
+ ret += vfprintf (dump->logfp, fmt, ap);
+ va_end (ap);
+
+ ret += fprintf (dump->logfp, "\n");
+
+ return ret;
+}
+
+
+static int
+xldump_options (dict_t *this, char *key, data_t *value, void *d)
+{
+ nprintf (d, " option %s %s", key, value->data);
+ return 0;
+}
+
+
+static void
+xldump_subvolumes (xlator_t *this, void *d)
+{
+ xlator_list_t *subv = NULL;
+ int len = 0;
+ char *subvstr = NULL;
+
+ subv = this->children;
+ if (!this->children)
+ return;
+
+ for (subv = this->children; subv; subv = subv->next)
+ len += (strlen (subv->xlator->name) + 1);
+
+ subvstr = GF_CALLOC (1, len, gf_common_mt_strdup);
+
+ len = 0;
+ for (subv = this->children; subv; subv= subv->next)
+ len += sprintf (subvstr + len, "%s%s", subv->xlator->name,
+ subv->next ? " " : "");
+
+ nprintf (d, " subvolumes %s", subvstr);
+
+ GF_FREE (subvstr);
+}
+
+
+static void
+xldump (xlator_t *each, void *d)
+{
+ nprintf (d, "volume %s", each->name);
+ nprintf (d, " type %s", each->type);
+ dict_foreach (each->options, xldump_options, d);
+
+ xldump_subvolumes (each, d);
+
+ nprintf (d, "end-volume");
+ nprintf (d, "");
+}
+
+
void
-gf_log_volume_file (FILE *specfp)
+gf_log_dump_graph (FILE *specfp, glusterfs_graph_t *graph)
{
- extern FILE *gf_log_logfile;
- int lcount = 0;
- char data[GF_UNIT_KB];
+ glusterfs_ctx_t *ctx;
+ struct xldump xld = {0, };
- fseek (specfp, 0L, SEEK_SET);
- fprintf (gf_log_logfile, "Given volfile:\n");
- fprintf (gf_log_logfile,
+ ctx = THIS->ctx;
+ xld.logfp = ctx->log.gf_log_logfile;
+
+ fprintf (ctx->log.gf_log_logfile, "Final graph:\n");
+ fprintf (ctx->log.gf_log_logfile,
"+---------------------------------------"
"---------------------------------------+\n");
- while (fgets (data, GF_UNIT_KB, specfp) != NULL){
- lcount++;
- fprintf (gf_log_logfile, "%3d: %s", lcount, data);
- }
- fprintf (gf_log_logfile,
- "\n+---------------------------------------"
+
+ xlator_foreach_depth_first (graph->top, xldump, &xld);
+
+ fprintf (ctx->log.gf_log_logfile,
+ "+---------------------------------------"
"---------------------------------------+\n");
- fflush (gf_log_logfile);
- fseek (specfp, 0L, SEEK_SET);
+ fflush (ctx->log.gf_log_logfile);
}
static void
@@ -392,26 +508,31 @@ out:
/* Obtain a backtrace and print it to stdout. */
/* TODO: It looks like backtrace_symbols allocates memory,
it may be problem because mostly memory allocation/free causes 'sigsegv' */
+
void
-gf_print_trace (int32_t signum)
+gf_print_trace (int32_t signum, glusterfs_ctx_t *ctx)
{
- extern FILE *gf_log_logfile;
- struct tm *tm = NULL;
char msg[1024] = {0,};
- char timestr[256] = {0,};
- time_t utime = 0;
+ char timestr[64] = {0,};
int ret = 0;
int fd = 0;
- fd = fileno (gf_log_logfile);
+ fd = fileno (ctx->log.gf_log_logfile);
+ /* Now every gf_log call will just write to a buffer and when the
+ * buffer becomes full, its written to the log-file. Suppose the process
+ * crashes and prints the backtrace in the log-file, then the previous
+ * log information will still be in the buffer itself. So flush the
+ * contents of the buffer to the log file before printing the backtrace
+ * which helps in debugging.
+ */
+ fflush (ctx->log.gf_log_logfile);
/* Pending frames, (if any), list them in order */
ret = write (fd, "pending frames:\n", 16);
if (ret < 0)
goto out;
{
- glusterfs_ctx_t *ctx = glusterfs_ctx_get ();
struct list_head *trav = ((call_pool_t *)ctx->pool)->all_frames.next;
while (trav != (&((call_pool_t *)ctx->pool)->all_frames)) {
call_frame_t *tmp = (call_frame_t *)(&((call_stack_t *)trav)->frames);
@@ -419,10 +540,10 @@ gf_print_trace (int32_t signum)
sprintf (msg,"frame : type(%d) op(%s)\n",
tmp->root->type,
gf_fop_list[tmp->root->op]);
- if (tmp->root->type == GF_OP_TYPE_MGMT)
- sprintf (msg,"frame : type(%d) op(%s)\n",
+ else
+ sprintf (msg,"frame : type(%d) op(%d)\n",
tmp->root->type,
- gf_mgmt_list[tmp->root->op]);
+ tmp->root->op);
ret = write (fd, msg, strlen (msg));
if (ret < 0)
@@ -448,9 +569,7 @@ gf_print_trace (int32_t signum)
{
/* Dump the timestamp of the crash too, so the previous logs
can be related */
- utime = time (NULL);
- tm = localtime (&utime);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S\n", tm);
+ gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_FT);
ret = write (fd, "time of crash: ", 15);
if (ret < 0)
goto out;
@@ -606,8 +725,7 @@ gf_strstr (const char *str, const char *delim, const char *match)
}
out:
- if (tmp_str)
- free (tmp_str);
+ free (tmp_str);
return ret;
@@ -660,6 +778,8 @@ gf_string2time (const char *str, uint32_t *n)
old_errno = errno;
errno = 0;
value = strtol (str, &tail, 0);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -678,11 +798,10 @@ gf_string2time (const char *str, uint32_t *n)
return 0;
}
-
int
-gf_string2percent (const char *str, uint32_t *n)
+gf_string2percent (const char *str, double *n)
{
- unsigned long value = 0;
+ double value = 0;
char *tail = NULL;
int old_errno = 0;
const char *s = NULL;
@@ -703,7 +822,9 @@ gf_string2percent (const char *str, uint32_t *n)
old_errno = errno;
errno = 0;
- value = strtol (str, &tail, 0);
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -737,6 +858,8 @@ _gf_string2long (const char *str, long *n, int base)
old_errno = errno;
errno = 0;
value = strtol (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -777,6 +900,8 @@ _gf_string2ulong (const char *str, unsigned long *n, int base)
old_errno = errno;
errno = 0;
value = strtoul (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -817,6 +942,8 @@ _gf_string2uint (const char *str, unsigned int *n, int base)
old_errno = errno;
errno = 0;
value = strtoul (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -848,6 +975,8 @@ _gf_string2double (const char *str, double *n)
old_errno = errno;
errno = 0;
value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -879,6 +1008,8 @@ _gf_string2longlong (const char *str, long long *n, int base)
old_errno = errno;
errno = 0;
value = strtoll (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -919,6 +1050,8 @@ _gf_string2ulonglong (const char *str, unsigned long long *n, int base)
old_errno = errno;
errno = 0;
value = strtoull (str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -992,7 +1125,7 @@ gf_string2int8 (const char *str, int8_t *n)
if (rv != 0)
return rv;
- if (l >= INT8_MIN && l <= INT8_MAX) {
+ if ((l >= INT8_MIN) && (l <= INT8_MAX)) {
*n = (int8_t) l;
return 0;
}
@@ -1011,7 +1144,7 @@ gf_string2int16 (const char *str, int16_t *n)
if (rv != 0)
return rv;
- if (l >= INT16_MIN && l <= INT16_MAX) {
+ if ((l >= INT16_MIN) && (l <= INT16_MAX)) {
*n = (int16_t) l;
return 0;
}
@@ -1030,7 +1163,7 @@ gf_string2int32 (const char *str, int32_t *n)
if (rv != 0)
return rv;
- if (l >= INT32_MIN && l <= INT32_MAX) {
+ if ((l >= INT32_MIN) && (l <= INT32_MAX)) {
*n = (int32_t) l;
return 0;
}
@@ -1049,7 +1182,7 @@ gf_string2int64 (const char *str, int64_t *n)
if (rv != 0)
return rv;
- if (l >= INT64_MIN && l <= INT64_MAX) {
+ if ((l >= INT64_MIN) && (l <= INT64_MAX)) {
*n = (int64_t) l;
return 0;
}
@@ -1261,7 +1394,7 @@ err:
int
gf_string2bytesize (const char *str, uint64_t *n)
{
- uint64_t value = 0ULL;
+ double value = 0.0;
char *tail = NULL;
int old_errno = 0;
const char *s = NULL;
@@ -1282,7 +1415,9 @@ gf_string2bytesize (const char *str, uint64_t *n)
old_errno = errno;
errno = 0;
- value = strtoull (str, &tail, 10);
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -1306,7 +1441,12 @@ gf_string2bytesize (const char *str, uint64_t *n)
return -1;
}
- *n = value;
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+
+ *n = (uint64_t) value;
return 0;
}
@@ -1316,7 +1456,7 @@ gf_string2percent_or_bytesize (const char *str,
uint64_t *n,
gf_boolean_t *is_percent)
{
- uint64_t value = 0ULL;
+ double value = 0ULL;
char *tail = NULL;
int old_errno = 0;
const char *s = NULL;
@@ -1338,7 +1478,9 @@ gf_string2percent_or_bytesize (const char *str,
old_errno = errno;
errno = 0;
- value = strtoull (str, &tail, 10);
+ value = strtod (str, &tail);
+ if (str == tail)
+ errno = EINVAL;
if (errno == ERANGE || errno == EINVAL)
return -1;
@@ -1363,7 +1505,13 @@ gf_string2percent_or_bytesize (const char *str,
return -1;
}
- *n = value;
+ /* Error out if we cannot store the value in uint64 */
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+
+ *n = (uint64_t) value;
return 0;
}
@@ -1629,11 +1777,9 @@ get_nth_word (const char *str, int n)
return word;
}
-/* RFC 1123 & 952 */
-/* Syntax formed combining RFC 1123 & 952 *
- <hname> ::= <first-name>*["."<gen-name>] *
- <first-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>]
- <gen-name> ::= <let>[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
+/* Syntax formed according to RFC 1912 (RFC 1123 & 952 are more restrictive) *
+ <hname> ::= <gen-name>*["."<gen-name>] *
+ <gen-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
char
valid_host_name (char *address, int length)
{
@@ -1644,7 +1790,7 @@ valid_host_name (char *address, int length)
char *temp_str = NULL;
char *save_ptr = NULL;
- if ((length > _POSIX_HOST_NAME_MAX) || (length == 1)) {
+ if ((length > _POSIX_HOST_NAME_MAX) || (length < 1)) {
ret = 0;
goto out;
}
@@ -1654,27 +1800,26 @@ valid_host_name (char *address, int length)
ret = 0;
goto out;
}
- temp_str = strtok_r (dup_addr,".", &save_ptr);
- /* first-name */
- if (!temp_str ||
- !isalnum(temp_str[0]) ||
- !isalnum (temp_str[strlen(temp_str)-1])) {
+ if (!isalnum (dup_addr[length - 1]) && (dup_addr[length - 1] != '*')) {
ret = 0;
goto out;
}
- for (i = 1; i < (strlen (temp_str) - 1); i++) {
- if (!isalnum (temp_str[i]) && (temp_str[i] != '-')) {
- ret = 0;
- goto out;
- }
+
+ /* Check for consecutive dots, which is invalid in a hostname and is
+ * ignored by strtok()
+ */
+ if (strstr (dup_addr, "..")) {
+ ret = 0;
+ goto out;
}
/* gen-name */
- while ((temp_str = strtok_r (NULL, ".", &save_ptr))) {
+ temp_str = strtok_r (dup_addr, ".", &save_ptr);
+ do {
str_len = strlen (temp_str);
- if (!isalpha (temp_str[0]) ||
+ if (!isalnum (temp_str[0]) ||
!isalnum (temp_str[str_len-1])) {
ret = 0;
goto out;
@@ -1685,11 +1830,10 @@ valid_host_name (char *address, int length)
goto out;
}
}
- }
+ } while ((temp_str = strtok_r (NULL, ".", &save_ptr)));
out:
- if (dup_addr)
- GF_FREE (dup_addr);
+ GF_FREE (dup_addr);
return ret;
}
@@ -1706,8 +1850,14 @@ valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc)
tmp = gf_strdup (address);
- /* To prevent cases where last character is '.' */
- if (!isdigit (tmp[length - 1]) && (tmp[length - 1] != '*')) {
+ /*
+ * To prevent cases where last character is '.' and which have
+ * consecutive dots like ".." as strtok ignore consecutive
+ * delimeters.
+ */
+ if (length <= 0 ||
+ (strstr (address, "..")) ||
+ (!isdigit (tmp[length - 1]) && (tmp[length - 1] != '*'))) {
ret = 0;
goto out;
}
@@ -1753,7 +1903,7 @@ valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc)
tmp = gf_strdup (address);
/* Check for compressed form */
- if (tmp[length - 1] == ':') {
+ if (length <= 0 || tmp[length - 1] == ':') {
ret = 0;
goto out;
}
@@ -1819,11 +1969,55 @@ out:
return ret;
}
+/**
+ * gf_sock_union_equal_addr - check if two given gf_sock_unions have same addr
+ *
+ * @param a - first sock union
+ * @param b - second sock union
+ * @return _gf_true if a and b have same ipv{4,6} addr, _gf_false otherwise
+ */
+gf_boolean_t
+gf_sock_union_equal_addr (union gf_sock_union *a,
+ union gf_sock_union *b)
+{
+ if (!a || !b) {
+ gf_log ("common-utils", GF_LOG_ERROR, "Invalid arguments"
+ " to gf_sock_union_equal_addr");
+ return _gf_false;
+ }
+
+ if (a->storage.ss_family != b->storage.ss_family)
+ return _gf_false;
+
+ switch (a->storage.ss_family) {
+ case AF_INET:
+ if (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr)
+ return _gf_true;
+ else
+ return _gf_false;
+
+ case AF_INET6:
+ if (memcmp ((void *)(&a->sin6.sin6_addr),
+ (void *)(&b->sin6.sin6_addr),
+ sizeof (a->sin6.sin6_addr)))
+ return _gf_false;
+ else
+ return _gf_true;
+
+ default:
+ gf_log ("common-utils", GF_LOG_DEBUG,
+ "Unsupported/invalid address family");
+ break;
+ }
+
+ return _gf_false;
+}
+
/*Thread safe conversion function*/
char *
uuid_utoa (uuid_t uuid)
{
- char *uuid_buffer = glusterfs_uuid_buf_get();
+ char *uuid_buffer = glusterfs_uuid_buf_get(THIS->ctx);
uuid_unparse (uuid, uuid_buffer);
return uuid_buffer;
}
@@ -1842,7 +2036,7 @@ uuid_utoa_r (uuid_t uuid, char *dst)
char *
lkowner_utoa (gf_lkowner_t *lkowner)
{
- char *lkowner_buffer = glusterfs_lkowner_buf_get();
+ char *lkowner_buffer = glusterfs_lkowner_buf_get(THIS->ctx);
lkowner_unparse (lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
return lkowner_buffer;
}
@@ -1919,8 +2113,7 @@ gf_is_str_int (const char *value)
}
out:
- if (fptr)
- GF_FREE (fptr);
+ GF_FREE (fptr);
return flag;
}
@@ -1930,9 +2123,9 @@ out:
*/
inline int32_t
-gf_roundup_power_of_two (uint32_t nr)
+gf_roundup_power_of_two (int32_t nr)
{
- uint32_t result = 1;
+ int32_t result = 1;
if (nr < 0) {
gf_log ("common-utils", GF_LOG_WARNING,
@@ -1954,7 +2147,7 @@ out:
*/
inline int32_t
-gf_roundup_next_power_of_two (uint32_t nr)
+gf_roundup_next_power_of_two (int32_t nr)
{
int32_t result = 1;
@@ -2088,9 +2281,6 @@ gf_strip_whitespace (char *str, int len)
return new_len;
}
-/* If the path exists use realpath(3) to handle extra slashes and to resolve
- * symlinks else strip the extra slashes in the path and return */
-
int
gf_canonicalize_path (char *path)
{
@@ -2129,8 +2319,590 @@ gf_canonicalize_path (char *path)
gf_log ("common-utils", GF_LOG_ERROR,
"Path manipulation failed");
- if (tmppath)
- GF_FREE(tmppath);
+ GF_FREE(tmppath);
return ret;
}
+
+static const char *__gf_timefmts[] = {
+ "%F %T",
+ "%Y/%m/%d-%T",
+ "%b %d %T",
+ "%F %H%M%S"
+};
+
+static const char *__gf_zerotimes[] = {
+ "0000-00-00 00:00:00",
+ "0000/00/00-00:00:00",
+ "xxx 00 00:00:00",
+ "0000-00-00 000000"
+};
+
+void
+_gf_timestuff (gf_timefmts *fmt, const char ***fmts, const char ***zeros)
+{
+ *fmt = gf_timefmt_last;
+ *fmts = __gf_timefmts;
+ *zeros = __gf_zerotimes;
+}
+
+
+char *
+generate_glusterfs_ctx_id (void)
+{
+ char tmp_str[1024] = {0,};
+ char hostname[256] = {0,};
+ struct timeval tv = {0,};
+ char now_str[32];
+
+ if (gettimeofday (&tv, NULL) == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "gettimeofday: failed %s",
+ strerror (errno));
+ }
+
+ if (gethostname (hostname, 256) == -1) {
+ gf_log ("glusterfsd", GF_LOG_ERROR,
+ "gethostname: failed %s",
+ strerror (errno));
+ }
+
+ gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
+ snprintf (tmp_str, sizeof tmp_str, "%s-%d-%s:%"
+#ifdef GF_DARWIN_HOST_OS
+ PRId32,
+#else
+ "ld",
+#endif
+ hostname, getpid(), now_str, tv.tv_usec);
+
+ return gf_strdup (tmp_str);
+}
+
+char *
+gf_get_reserved_ports ()
+{
+ char *ports_info = NULL;
+#if defined GF_LINUX_HOST_OS
+ int proc_fd = -1;
+ char *proc_file = "/proc/sys/net/ipv4/ip_local_reserved_ports";
+ char buffer[4096] = {0,};
+ int32_t ret = -1;
+
+ proc_fd = open (proc_file, O_RDONLY);
+ if (proc_fd == -1) {
+ /* What should be done in this case? error out from here
+ * and thus stop the glusterfs process from starting or
+ * continue with older method of using any of the available
+ * port? For now 2nd option is considered.
+ */
+ gf_log ("glusterfs", GF_LOG_WARNING, "could not open "
+ "the file /proc/sys/net/ipv4/ip_local_reserved_ports "
+ "for getting reserved ports info (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ ret = read (proc_fd, buffer, sizeof (buffer));
+ if (ret < 0) {
+ gf_log ("glusterfs", GF_LOG_WARNING, "could not "
+ "read the file %s for getting reserved ports "
+ "info (%s)", proc_file, strerror (errno));
+ goto out;
+ }
+ ports_info = gf_strdup (buffer);
+
+out:
+ if (proc_fd != -1)
+ close (proc_fd);
+#endif /* GF_LINUX_HOST_OS */
+ return ports_info;
+}
+
+int
+gf_process_reserved_ports (gf_boolean_t *ports)
+{
+ int ret = -1;
+#if defined GF_LINUX_HOST_OS
+ char *ports_info = NULL;
+ char *tmp = NULL;
+ char *blocked_port = NULL;
+
+ ports_info = gf_get_reserved_ports ();
+ if (!ports_info) {
+ gf_log ("glusterfs", GF_LOG_WARNING, "Not able to get reserved "
+ "ports, hence there is a possibility that glusterfs "
+ "may consume reserved port");
+ goto out;
+ }
+
+ blocked_port = strtok_r (ports_info, ",\n",&tmp);
+
+ while (blocked_port) {
+ gf_ports_reserved (blocked_port, ports);
+ blocked_port = strtok_r (NULL, ",\n", &tmp);
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE (ports_info);
+#endif /* GF_LINUX_HOST_OS */
+ return ret;
+}
+
+gf_boolean_t
+gf_ports_reserved (char *blocked_port, gf_boolean_t *ports)
+{
+ gf_boolean_t result = _gf_false;
+ char *range_port = NULL;
+ int16_t tmp_port1, tmp_port2 = -1;
+
+ if (strstr (blocked_port, "-") == NULL) {
+ /* get rid of the new line character*/
+ if (blocked_port[strlen(blocked_port) -1] == '\n')
+ blocked_port[strlen(blocked_port) -1] = '\0';
+ if (gf_string2int16 (blocked_port, &tmp_port1) == 0) {
+ if (tmp_port1 > (GF_CLIENT_PORT_CEILING - 1)
+ || tmp_port1 < 0) {
+ gf_log ("glusterfs-socket", GF_LOG_WARNING,
+ "invalid port %d", tmp_port1);
+ result = _gf_true;
+ goto out;
+ } else {
+ gf_log ("glusterfs", GF_LOG_DEBUG,
+ "blocking port %d", tmp_port1);
+ ports[tmp_port1] = _gf_true;
+ }
+ } else {
+ gf_log ("glusterfs-socket", GF_LOG_WARNING, "%s is "
+ "not a valid port identifier", blocked_port);
+ result = _gf_true;
+ goto out;
+ }
+ } else {
+ range_port = strtok (blocked_port, "-");
+ if (!range_port){
+ result = _gf_true;
+ goto out;
+ }
+ if (gf_string2int16 (range_port, &tmp_port1) == 0) {
+ if (tmp_port1 > (GF_CLIENT_PORT_CEILING - 1))
+ tmp_port1 = GF_CLIENT_PORT_CEILING - 1;
+ if (tmp_port1 < 0)
+ tmp_port1 = 0;
+ }
+ range_port = strtok (NULL, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ /* get rid of the new line character*/
+ if (range_port[strlen(range_port) -1] == '\n')
+ range_port[strlen(range_port) - 1] = '\0';
+ if (gf_string2int16 (range_port, &tmp_port2) == 0) {
+ if (tmp_port2 >
+ (GF_CLIENT_PORT_CEILING - 1))
+ tmp_port2 = GF_CLIENT_PORT_CEILING - 1;
+ if (tmp_port2 < 0)
+ tmp_port2 = 0;
+ }
+ gf_log ("glusterfs", GF_LOG_DEBUG, "lower: %d, higher: %d",
+ tmp_port1, tmp_port2);
+ for (; tmp_port1 <= tmp_port2; tmp_port1++)
+ ports[tmp_port1] = _gf_true;
+ }
+
+out:
+ return result;
+}
+
+/* Takes in client ip{v4,v6} and returns associated hostname, if any
+ * Also, allocates memory for the hostname.
+ * Returns: 0 for success, -1 for failure
+ */
+int
+gf_get_hostname_from_ip (char *client_ip, char **hostname)
+{
+ int ret = -1;
+ struct sockaddr *client_sockaddr = NULL;
+ struct sockaddr_in client_sock_in = {0};
+ struct sockaddr_in6 client_sock_in6 = {0};
+ char client_hostname[NI_MAXHOST] = {0};
+ char *client_ip_copy = NULL;
+ char *tmp = NULL;
+ char *ip = NULL;
+
+ /* if ipv4, reverse lookup the hostname to
+ * allow FQDN based rpc authentication
+ */
+ if (valid_ipv4_address (client_ip, strlen (client_ip), 0) == _gf_false) {
+ /* most times, we get a.b.c.d:port form, so check that */
+ client_ip_copy = gf_strdup (client_ip);
+ if (!client_ip_copy)
+ goto out;
+
+ ip = strtok_r (client_ip_copy, ":", &tmp);
+ } else {
+ ip = client_ip;
+ }
+
+ if (valid_ipv4_address (ip, strlen (ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in;
+ client_sock_in.sin_family = AF_INET;
+ ret = inet_pton (AF_INET, ip,
+ (void *)&client_sock_in.sin_addr.s_addr);
+
+ } else if (valid_ipv6_address (ip, strlen (ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *) &client_sock_in6;
+
+ client_sock_in6.sin6_family = AF_INET6;
+ ret = inet_pton (AF_INET6, ip,
+ (void *)&client_sock_in6.sin6_addr);
+ } else {
+ goto out;
+ }
+
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = getnameinfo (client_sockaddr,
+ sizeof (*client_sockaddr),
+ client_hostname, sizeof (client_hostname),
+ NULL, 0, 0);
+ if (ret) {
+ gf_log ("common-utils", GF_LOG_ERROR,
+ "Could not lookup hostname of %s : %s",
+ client_ip, gai_strerror (ret));
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = gf_strdup ((char *)client_hostname);
+ out:
+ if (client_ip_copy)
+ GF_FREE (client_ip_copy);
+
+ return ret;
+}
+
+gf_boolean_t
+gf_interface_search (char *ip)
+{
+ int32_t ret = -1;
+ gf_boolean_t found = _gf_false;
+ struct ifaddrs *ifaddr, *ifa;
+ int family;
+ char host[NI_MAXHOST];
+ xlator_t *this = NULL;
+ char *pct = NULL;
+
+ this = THIS;
+
+ ret = getifaddrs (&ifaddr);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "getifaddrs() failed: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+ if (!ifa->ifa_addr) {
+ /*
+ * This seemingly happens if an interface hasn't
+ * been bound to a particular protocol (seen with
+ * TUN devices).
+ */
+ continue;
+ }
+ family = ifa->ifa_addr->sa_family;
+
+ if (family != AF_INET && family != AF_INET6)
+ continue;
+
+ ret = getnameinfo (ifa->ifa_addr,
+ (family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6),
+ host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getnameinfo() failed: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ /*
+ * Sometimes the address comes back as addr%eth0 or
+ * similar. Since % is an invalid character, we can
+ * strip it out with confidence that doing so won't
+ * harm anything.
+ */
+ pct = index(host,'%');
+ if (pct) {
+ *pct = '\0';
+ }
+
+ if (strncmp (ip, host, NI_MAXHOST) == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s is local address at interface %s",
+ ip, ifa->ifa_name);
+ found = _gf_true;
+ goto out;
+ }
+ }
+out:
+ if(ifaddr)
+ freeifaddrs (ifaddr);
+ return found;
+}
+
+char *
+get_ip_from_addrinfo (struct addrinfo *addr, char **ip)
+{
+ char buf[64];
+ void *in_addr = NULL;
+ struct sockaddr_in *s4 = NULL;
+ struct sockaddr_in6 *s6 = NULL;
+
+ switch (addr->ai_family)
+ {
+ case AF_INET:
+ s4 = (struct sockaddr_in *)addr->ai_addr;
+ in_addr = &s4->sin_addr;
+ break;
+
+ case AF_INET6:
+ s6 = (struct sockaddr_in6 *)addr->ai_addr;
+ in_addr = &s6->sin6_addr;
+ break;
+
+ default:
+ gf_log ("glusterd", GF_LOG_ERROR, "Invalid family");
+ return NULL;
+ }
+
+ if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) {
+ gf_log ("glusterd", GF_LOG_ERROR, "String conversion failed");
+ return NULL;
+ }
+
+ *ip = strdup (buf);
+ return *ip;
+}
+
+gf_boolean_t
+gf_is_loopback_localhost (const struct sockaddr *sa, char *hostname)
+{
+ GF_ASSERT (sa);
+
+ gf_boolean_t is_local = _gf_false;
+ const struct in_addr *addr4 = NULL;
+ const struct in6_addr *addr6 = NULL;
+ uint8_t *ap = NULL;
+ struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr4 = &(((struct sockaddr_in *)sa)->sin_addr);
+ ap = (uint8_t*)&addr4->s_addr;
+ if (ap[0] == 127)
+ is_local = _gf_true;
+ break;
+
+ case AF_INET6:
+ addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr);
+ if (memcmp (addr6, &loopbackaddr6,
+ sizeof (loopbackaddr6)) == 0)
+ is_local = _gf_true;
+ break;
+
+ default:
+ if (hostname)
+ gf_log ("glusterd", GF_LOG_ERROR,
+ "unknown address family %d for %s",
+ sa->sa_family, hostname);
+ break;
+ }
+
+ return is_local;
+}
+
+gf_boolean_t
+gf_is_local_addr (char *hostname)
+{
+ int32_t ret = -1;
+ struct addrinfo *result = NULL;
+ struct addrinfo *res = NULL;
+ gf_boolean_t found = _gf_false;
+ char *ip = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ ret = getaddrinfo (hostname, NULL, NULL, &result);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "error in getaddrinfo: %s\n",
+ gai_strerror(ret));
+ goto out;
+ }
+
+ for (res = result; res != NULL; res = res->ai_next) {
+ gf_log (this->name, GF_LOG_DEBUG, "%s ",
+ get_ip_from_addrinfo (res, &ip));
+
+ found = gf_is_loopback_localhost (res->ai_addr, hostname)
+ || gf_interface_search (ip);
+ if (found)
+ goto out;
+ }
+
+out:
+ if (result)
+ freeaddrinfo (result);
+
+ if (!found)
+ gf_log (this->name, GF_LOG_DEBUG, "%s is not local", hostname);
+
+ return found;
+}
+
+gf_boolean_t
+gf_is_same_address (char *name1, char *name2)
+{
+ struct addrinfo *addr1 = NULL;
+ struct addrinfo *addr2 = NULL;
+ struct addrinfo *p = NULL;
+ struct addrinfo *q = NULL;
+ gf_boolean_t ret = _gf_false;
+ int gai_err = 0;
+
+ gai_err = getaddrinfo(name1,NULL,NULL,&addr1);
+ if (gai_err != 0) {
+ gf_log (name1, GF_LOG_WARNING,
+ "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ goto out;
+ }
+
+ gai_err = getaddrinfo(name2,NULL,NULL,&addr2);
+ if (gai_err != 0) {
+ gf_log (name2, GF_LOG_WARNING,
+ "error in getaddrinfo: %s\n", gai_strerror(gai_err));
+ goto out;
+ }
+
+ for (p = addr1; p; p = p->ai_next) {
+ for (q = addr2; q; q = q->ai_next) {
+ if (p->ai_addrlen != q->ai_addrlen) {
+ continue;
+ }
+ if (memcmp(p->ai_addr,q->ai_addr,p->ai_addrlen)) {
+ continue;
+ }
+ ret = _gf_true;
+ goto out;
+ }
+ }
+
+out:
+ if (addr1) {
+ freeaddrinfo(addr1);
+ }
+ if (addr2) {
+ freeaddrinfo(addr2);
+ }
+ return ret;
+
+}
+
+
+/* Sets log file path from user provided arguments */
+int
+gf_set_log_file_path (cmd_args_t *cmd_args)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ char tmp_str[1024] = {0,};
+
+ if (!cmd_args)
+ goto done;
+
+ if (cmd_args->mount_point) {
+ j = 0;
+ i = 0;
+ if (cmd_args->mount_point[0] == '/')
+ i = 1;
+ for (; i < strlen (cmd_args->mount_point); i++,j++) {
+ tmp_str[j] = cmd_args->mount_point[i];
+ if (cmd_args->mount_point[i] == '/')
+ tmp_str[j] = '-';
+ }
+
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
+ tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile) {
+ j = 0;
+ i = 0;
+ if (cmd_args->volfile[0] == '/')
+ i = 1;
+ for (; i < strlen (cmd_args->volfile); i++,j++) {
+ tmp_str[j] = cmd_args->volfile[i];
+ if (cmd_args->volfile[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
+ tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile_server) {
+
+ ret = gf_asprintf (&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
+ cmd_args->volfile_server,
+ cmd_args->volfile_id, getpid());
+ if (ret > 0)
+ ret = 0;
+ }
+done:
+ return ret;
+}
+
+int
+gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg)
+{
+ sigset_t set, old;
+ int ret;
+
+ sigemptyset (&set);
+
+ sigfillset (&set);
+ sigdelset (&set, SIGSEGV);
+ sigdelset (&set, SIGBUS);
+ sigdelset (&set, SIGILL);
+ sigdelset (&set, SIGSYS);
+ sigdelset (&set, SIGFPE);
+ sigdelset (&set, SIGABRT);
+
+ pthread_sigmask (SIG_BLOCK, &set, &old);
+
+ ret = pthread_create (thread, attr, start_routine, arg);
+
+ pthread_sigmask (SIG_SETMASK, &old, NULL);
+
+ return ret;
+}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index 9903edad4..3c99a4212 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -23,6 +23,7 @@
#include <string.h>
#include <assert.h>
#include <pthread.h>
+#include <openssl/md5.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif
@@ -65,6 +66,8 @@ void trap (void);
#define GEOREP "geo-replication"
#define GHADOOP "glusterfs-hadoop"
+#define GF_SELINUX_XATTR_KEY "security.selinux"
+
#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
#define IS_EXT_FS(fs_name) \
@@ -75,7 +78,8 @@ void trap (void);
/* Defining this here as it is needed by glusterd for setting
* nfs port in volume status.
*/
-#define GF_NFS3_PORT 38467
+#define GF_NFS3_PORT 2049
+#define GF_CLIENT_PORT_CEILING 1024
enum _gf_boolean
{
@@ -106,15 +110,14 @@ void gf_global_variable_init(void);
in_addr_t gf_resolve_ip (const char *hostname, void **dnscache);
-void gf_log_volume_file (FILE *specfp);
-void gf_print_trace (int32_t signal);
-
-extern char *gf_fop_list[GF_FOP_MAXVALUE];
-extern char *gf_mgmt_list[GF_MGMT_MAXVALUE];
+void gf_log_dump_graph (FILE *specfp, glusterfs_graph_t *graph);
+void gf_print_trace (int32_t signal, glusterfs_ctx_t *ctx);
+int gf_set_log_file_path (cmd_args_t *cmd_args);
#define VECTORSIZE(count) (count * (sizeof (struct iovec)))
#define STRLEN_0(str) (strlen(str) + 1)
+
#define VALIDATE_OR_GOTO(arg,label) do { \
if (!arg) { \
errno = EINVAL; \
@@ -173,25 +176,22 @@ extern char *gf_mgmt_list[GF_MGMT_MAXVALUE];
} \
} while (0)
-
-#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, trav, op_errno, label) \
+#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, op_errno, label) \
do { \
if (!dict) { \
gf_log (this->name, GF_LOG_ERROR, \
"setxattr dict is null"); \
goto label; \
} \
- trav = dict->members_list; \
- while (trav) { \
- if (!fnmatch (pattern, trav->key, 0)) { \
- op_errno = EPERM; \
- gf_log (this->name, GF_LOG_ERROR, \
- "attempt to set internal" \
- " xattr: %s: %s", trav->key, \
- strerror (op_errno)); \
- goto label; \
- } \
- trav = trav->next; \
+ if (dict_foreach_fnmatch (dict, pattern, \
+ dict_null_foreach_fn, \
+ NULL) > 0) { \
+ op_errno = EPERM; \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "attempt to set internal" \
+ " xattr: %s: %s", pattern, \
+ strerror (op_errno)); \
+ goto label; \
} \
} while (0)
@@ -267,7 +267,7 @@ iov_length (const struct iovec *vector, int count)
static inline struct iovec *
-iov_dup (struct iovec *vector, int count)
+iov_dup (const struct iovec *vector, int count)
{
int bytecount = 0;
int i;
@@ -347,6 +347,65 @@ iov_unload (char *buf, const struct iovec *vector, int count)
}
+static inline size_t
+iov_load (const struct iovec *vector, int count, char *buf, int size)
+{
+ size_t left = size;
+ size_t cp = 0;
+ int ret = 0;
+ int i = 0;
+
+ while (left && i < count) {
+ cp = min (vector[i].iov_len, left);
+ if (vector[i].iov_base != buf + (size - left))
+ memcpy (vector[i].iov_base, buf + (size - left), cp);
+ ret += cp;
+ left -= cp;
+ if (left)
+ i++;
+ }
+
+ return ret;
+}
+
+
+static inline size_t
+iov_copy (const struct iovec *dst, int dcnt,
+ const struct iovec *src, int scnt)
+{
+ size_t ret = 0;
+ size_t left = 0;
+ size_t min_i = 0;
+ int s_i = 0, s_ii = 0;
+ int d_i = 0, d_ii = 0;
+
+ ret = min (iov_length (dst, dcnt), iov_length (src, scnt));
+ left = ret;
+
+ while (left) {
+ min_i = min (dst[d_i].iov_len - d_ii, src[s_i].iov_len - s_ii);
+ memcpy (dst[d_i].iov_base + d_ii, src[s_i].iov_base + s_ii,
+ min_i);
+
+ d_ii += min_i;
+ if (d_ii == dst[d_i].iov_len) {
+ d_ii = 0;
+ d_i++;
+ }
+
+ s_ii += min_i;
+ if (s_ii == src[s_i].iov_len) {
+ s_ii = 0;
+ s_i++;
+ }
+
+ left -= min_i;
+ }
+
+ return ret;
+}
+
+
static inline int
mem_0filled (const char *buf, size_t size)
{
@@ -392,6 +451,34 @@ memdup (const void *ptr, size_t size)
return newptr;
}
+typedef enum {
+ gf_timefmt_default = 0,
+ gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
+ gf_timefmt_Ymd_T, /* YYYY/MM-DD-hh:mm:ss */
+ gf_timefmt_bdT, /* ddd DD hh:mm:ss */
+ gf_timefmt_F_HMS, /* YYYY-MM-DD hhmmss */
+ gf_timefmt_last
+} gf_timefmts;
+
+static inline void
+gf_time_fmt (char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+{
+ extern void _gf_timestuff (gf_timefmts *, const char ***, const char ***);
+ static gf_timefmts timefmt_last = (gf_timefmts) -1;
+ static const char **fmts;
+ static const char **zeros;
+ struct tm tm;
+
+ if (timefmt_last == -1)
+ _gf_timestuff (&timefmt_last, &fmts, &zeros);
+ if (timefmt_last < fmt) fmt = gf_timefmt_default;
+ if (utime && gmtime_r (&utime, &tm) != NULL) {
+ strftime (dst, sz_dst, fmts[fmt], &tm);
+ } else {
+ strncpy (dst, "N/A", sz_dst);
+ }
+}
+
int
mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks);
/*
@@ -399,14 +486,17 @@ mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks);
* nr
*/
-int32_t gf_roundup_power_of_two (uint32_t nr);
+int
+gf_lstat_dir (const char *path, struct stat *stbuf_in);
+
+int32_t gf_roundup_power_of_two (int32_t nr);
/*
* rounds up nr to next power of two. If nr is already a power of two, next
* power of two is returned.
*/
-int32_t gf_roundup_next_power_of_two (uint32_t nr);
+int32_t gf_roundup_next_power_of_two (int32_t nr);
char *gf_trim (char *string);
int gf_strsplit (const char *str, const char *delim,
@@ -444,7 +534,7 @@ int gf_string2percent_or_bytesize (const char *str, uint64_t *n,
gf_boolean_t *is_percent);
int gf_string2boolean (const char *str, gf_boolean_t *b);
-int gf_string2percent (const char *str, uint32_t *n);
+int gf_string2percent (const char *str, double *n);
int gf_string2time (const char *str, uint32_t *n);
int gf_lockfd (int fd);
@@ -469,6 +559,8 @@ char valid_internet_address (char *address, gf_boolean_t wildcard_acc);
char valid_ipv4_wildcard_check (char *address);
char valid_ipv6_wildcard_check (char *address);
char valid_wildcard_internet_address (char *address);
+gf_boolean_t gf_sock_union_equal_addr (union gf_sock_union *a,
+ union gf_sock_union *b);
char *uuid_utoa (uuid_t uuid);
char *uuid_utoa_r (uuid_t uuid, char *dst);
@@ -487,4 +579,16 @@ void gf_path_strip_trailing_slashes (char *path);
uint64_t get_mem_size ();
int gf_strip_whitespace (char *str, int len);
int gf_canonicalize_path (char *path);
+char *generate_glusterfs_ctx_id (void);
+char *gf_get_reserved_ports();
+int gf_process_reserved_ports (gf_boolean_t ports[]);
+gf_boolean_t gf_ports_reserved (char *blocked_port, gf_boolean_t *ports);
+int gf_get_hostname_from_ip (char *client_ip, char **hostname);
+gf_boolean_t gf_is_local_addr (char *hostname);
+gf_boolean_t gf_is_same_address (char *host1, char *host2);
+void md5_wrapper(const unsigned char *data, size_t len, char *md5);
+
+int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg);
+
#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c
index 99a0041a5..eb6d8d4b7 100644
--- a/libglusterfs/src/compat.c
+++ b/libglusterfs/src/compat.c
@@ -142,8 +142,7 @@ done:
*path = export_path;
}
out:
- if (freeptr)
- GF_FREE (freeptr);
+ GF_FREE (freeptr);
if (ret && export_path)
GF_FREE (export_path);
@@ -195,8 +194,7 @@ solaris_xattr_resolve_path (const char *real_path, char **path)
*path = gf_strdup (xattr_path);
}
out:
- if (export_path)
- GF_FREE (export_path);
+ GF_FREE (export_path);
if (*path)
return 0;
else
@@ -230,8 +228,7 @@ solaris_setxattr(const char *path, const char* key, const char *value,
path, errno);
ret = -1;
}
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE (mapped_path);
return ret;
}
@@ -297,8 +294,7 @@ solaris_listxattr(const char *path, char *list, size_t size)
close (attrdirfd);
}
out:
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE (mapped_path);
return len;
}
@@ -380,8 +376,7 @@ solaris_removexattr(const char *path, const char* key)
ret = -1;
}
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE (mapped_path);
return ret;
}
@@ -421,8 +416,7 @@ solaris_getxattr(const char *path,
errno = ENODATA;
ret = -1;
}
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE (mapped_path);
return ret;
}
@@ -494,8 +488,7 @@ int solaris_unlink (const char *path)
}
out:
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE (mapped_path);
return unlink (path);
}
@@ -550,10 +543,3 @@ strnlen(const char *string, size_t maxlen)
return len;
}
#endif /* STRNLEN */
-
-#ifdef THREAD_UNSAFE_BASENAME
-#include "../../contrib/libgen/basename_r.c"
-#endif
-#ifdef THREAD_UNSAFE_DIRNAME
-#include "../../contrib/libgen/dirname_r.c"
-#endif
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
index a6eec2ded..2bd982541 100644
--- a/libglusterfs/src/compat.h
+++ b/libglusterfs/src/compat.h
@@ -32,6 +32,12 @@
#include <linux/limits.h>
#include <sys/xattr.h>
#include <endian.h>
+#ifdef HAVE_FALLOC_H
+#include <linux/falloc.h>
+#else
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#endif
#ifndef HAVE_LLISTXATTR
@@ -340,10 +346,13 @@ char *basename_r(const char *);
#endif /* THREAD_UNSAFE_BASENAME */
#ifdef THREAD_UNSAFE_DIRNAME
-char *dirname_r(const char *path);
+char *dirname_r(char *path);
#define dirname(path) dirname_r(path)
#endif /* THREAD_UNSAFE_DIRNAME */
+int gf_mkostemp (char *tmpl, int suffixlen, int flags);
+#define mkostemp(tmpl, flags) gf_mkostemp(tmpl, 0, flags);
+
#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
/* Linux, Solaris, Cygwin */
#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
new file mode 100644
index 000000000..0082601d4
--- /dev/null
+++ b/libglusterfs/src/ctx.c
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif /* !_CONFIG_H */
+
+#include <pthread.h>
+
+#include "glusterfs.h"
+
+glusterfs_ctx_t *
+glusterfs_ctx_new ()
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ /* no GF_CALLOC here, gf_acct_mem_set_enable is not
+ yet decided at this point */
+ ctx = calloc (1, sizeof (*ctx));
+ if (!ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD (&ctx->graphs);
+ INIT_LIST_HEAD (&ctx->mempool_list);
+
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+
+ ret = pthread_mutex_init (&ctx->lock, NULL);
+ if (ret) {
+ free (ctx);
+ ctx = NULL;
+ }
+out:
+ return ctx;
+}
+
diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c
index bf4d01934..2ebb25150 100644
--- a/libglusterfs/src/defaults.c
+++ b/libglusterfs/src/defaults.c
@@ -455,6 +455,35 @@ default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int32_t
+default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre,
+ post, xdata);
+ return 0;
+}
+
+
+int32_t
default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data)
{
@@ -862,14 +891,45 @@ default_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
return 0;
}
+int32_t
+default_fallocate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+}
+
+int32_t
+default_discard_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND(frame, default_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
+ return 0;
+}
+
+int32_t
+default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND(frame, default_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
+ return 0;
+}
+
+
/* FOPS */
int32_t
default_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
return 0;
}
@@ -877,8 +937,9 @@ int32_t
default_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
return 0;
}
@@ -886,56 +947,57 @@ int32_t
default_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
return 0;
}
int32_t
default_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
return 0;
}
int32_t
default_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
return 0;
}
int32_t
default_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
return 0;
}
int32_t
default_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
}
int32_t
default_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
return 0;
}
int32_t
default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
return 0;
}
@@ -944,9 +1006,9 @@ default_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iovec *vector, int32_t count, off_t off, uint32_t flags,
struct iobref *iobref, dict_t *xdata)
{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ off, flags, iobref, xdata);
return 0;
}
@@ -954,8 +1016,9 @@ int32_t
default_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
+ flags, xdata);
return 0;
}
@@ -964,8 +1027,8 @@ int32_t
default_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
}
@@ -973,9 +1036,9 @@ int32_t
default_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
return 0;
}
@@ -983,8 +1046,8 @@ int32_t
default_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
dict_t *xdata)
{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
return 0;
}
@@ -992,8 +1055,9 @@ int32_t
default_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
loc_t *newloc, dict_t *xdata)
{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc,
+ xdata);
return 0;
}
@@ -1002,9 +1066,9 @@ int
default_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc,
+ umask, xdata);
return 0;
}
@@ -1012,8 +1076,8 @@ int32_t
default_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
dict_t *xdata)
{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
return 0;
}
@@ -1021,8 +1085,8 @@ int32_t
default_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata)
{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
return 0;
}
@@ -1030,8 +1094,9 @@ int
default_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
return 0;
}
@@ -1040,17 +1105,17 @@ int
default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev,
+ umask, xdata);
return 0;
}
int32_t
default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
return 0;
}
@@ -1058,16 +1123,16 @@ default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
int32_t
default_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata)
{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
return 0;
}
int32_t
default_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
}
@@ -1075,8 +1140,8 @@ int32_t
default_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
return 0;
}
@@ -1085,8 +1150,9 @@ int32_t
default_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict,
+ xdata);
return 0;
}
@@ -1094,8 +1160,9 @@ int32_t
default_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict,
+ xdata);
return 0;
}
@@ -1103,8 +1170,9 @@ int32_t
default_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
const char *name, dict_t *xdata)
{
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
return 0;
}
@@ -1112,8 +1180,9 @@ int32_t
default_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
const char *name, dict_t *xdata)
{
- STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
return 0;
}
@@ -1121,8 +1190,8 @@ int32_t
default_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
return 0;
}
@@ -1133,9 +1202,9 @@ default_inodelk (call_frame_t *frame, xlator_t *this,
struct gf_flock *lock,
dict_t *xdata)
{
- STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd,
+ lock, xdata);
return 0;
}
@@ -1144,9 +1213,9 @@ default_finodelk (call_frame_t *frame, xlator_t *this,
const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
dict_t *xdata)
{
- STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd,
+ lock, xdata);
return 0;
}
@@ -1156,9 +1225,9 @@ default_entrylk (call_frame_t *frame, xlator_t *this,
entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc,
+ basename, cmd, type, xdata);
return 0;
}
@@ -1168,9 +1237,9 @@ default_fentrylk (call_frame_t *frame, xlator_t *this,
entrylk_cmd cmd, entrylk_type type,
dict_t *xdata)
{
- STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd,
+ basename, cmd, type, xdata);
return 0;
}
@@ -1179,8 +1248,9 @@ default_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t len,
dict_t *xdata)
{
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len,
+ xdata);
return 0;
}
@@ -1190,8 +1260,9 @@ default_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
size_t size, off_t off,
dict_t *xdata)
{
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off,
+ xdata);
return 0;
}
@@ -1200,8 +1271,9 @@ int32_t
default_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
size_t size, off_t off, dict_t *xdata)
{
- STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off,
+ xdata);
return 0;
}
@@ -1210,8 +1282,9 @@ default_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
struct iatt *stbuf, int32_t valid,
dict_t *xdata)
{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid,
+ xdata);
return 0;
}
@@ -1219,8 +1292,8 @@ int32_t
default_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
dict_t *xdata)
{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
}
@@ -1228,8 +1301,8 @@ int32_t
default_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata)
{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
return 0;
}
@@ -1237,8 +1310,8 @@ int32_t
default_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata)
{
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
return 0;
}
@@ -1247,8 +1320,39 @@ default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid,
dict_t *xdata)
{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid,
+ xdata);
+ return 0;
+}
+
+int32_t
+default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset,
+ len, xdata);
+ return 0;
+}
+
+int32_t
+default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len,
+ xdata);
+ return 0;
+}
+
+int32_t
+default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len,
+ xdata);
return 0;
}
@@ -1287,8 +1391,8 @@ int32_t
default_getspec (call_frame_t *frame, xlator_t *this, const char *key,
int32_t flags)
{
- STACK_WIND (frame, default_getspec_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getspec, key, flags);
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getspec, key, flags);
return 0;
}
diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h
index 8a9de7899..0747027bc 100644
--- a/libglusterfs/src/defaults.h
+++ b/libglusterfs/src/defaults.h
@@ -243,6 +243,25 @@ int32_t default_fsetattr (call_frame_t *frame,
struct iatt *stbuf,
int32_t valid, dict_t *xdata);
+int32_t default_fallocate(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_discard(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_zerofill(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+
/* Resume */
int32_t default_getspec_resume (call_frame_t *frame,
xlator_t *this,
@@ -453,6 +472,25 @@ int32_t default_fsetattr_resume (call_frame_t *frame,
struct iatt *stbuf,
int32_t valid, dict_t *xdata);
+int32_t default_fallocate_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_discard_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t default_zerofill_resume(call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len, dict_t *xdata);
+
+
/* _cbk */
int32_t
@@ -663,6 +701,18 @@ default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *statpre,
struct iatt *statpost, dict_t *xdata);
+int32_t default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
int32_t
default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, char *spec_data);
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 9b0d7ff18..3b7ddce5e 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -14,6 +14,7 @@
#include <stdio.h>
#include <inttypes.h>
#include <limits.h>
+#include <fnmatch.h>
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -29,16 +30,6 @@
#include "byte-order.h"
#include "globals.h"
-data_pair_t *
-get_new_data_pair ()
-{
- data_pair_t *data_pair_ptr = NULL;
-
- data_pair_ptr = mem_get0 (THIS->ctx->dict_pair_pool);
-
- return data_pair_ptr;
-}
-
data_t *
get_new_data ()
{
@@ -63,11 +54,32 @@ get_new_dict_full (int size_hint)
}
dict->hash_size = size_hint;
- dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
-
- if (!dict->members) {
- mem_put (dict);
- return NULL;
+ if (size_hint == 1) {
+ /*
+ * This is the only case we ever see currently. If we ever
+ * need to support resizing the hash table, the resize function
+ * will have to take into account the possibility that
+ * "members" is not separately allocated (i.e. don't just call
+ * realloc() blindly.
+ */
+ dict->members = &dict->members_internal;
+ }
+ else {
+ /*
+ * We actually need to allocate space for size_hint *pointers*
+ * but we actually allocate space for one *structure*. Since
+ * a data_pair_t consists of five pointers, we're wasting four
+ * pointers' worth for N=1, and will overrun what we allocated
+ * for N>5. If anybody ever starts using size_hint, we'll need
+ * to fix this.
+ */
+ GF_ASSERT (size_hint <=
+ (sizeof(data_pair_t) / sizeof(data_pair_t *)));
+ dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
+ if (!dict->members) {
+ mem_put (dict);
+ return NULL;
+ }
}
LOCK_INIT (&dict->lock);
@@ -170,8 +182,7 @@ data_copy (data_t *old)
err_out:
- if (newdata->data)
- FREE (newdata->data);
+ FREE (newdata->data);
mem_put (newdata);
return NULL;
@@ -198,7 +209,7 @@ _dict_lookup (dict_t *this, char *key)
}
int32_t
-dict_lookup (dict_t *this, char *key, data_pair_t **data)
+dict_lookup (dict_t *this, char *key, data_t **data)
{
if (!this || !key || !data) {
gf_log_callingfn ("dict", GF_LOG_WARNING,
@@ -206,22 +217,22 @@ dict_lookup (dict_t *this, char *key, data_pair_t **data)
return -1;
}
+ data_pair_t *tmp = NULL;
LOCK (&this->lock);
{
- *data = _dict_lookup (this, key);
+ tmp = _dict_lookup (this, key);
}
UNLOCK (&this->lock);
- if (*data)
- return 0;
- else
+
+ if (!tmp)
return -1;
+ *data = tmp->value;
+ return 0;
}
static int32_t
-_dict_set (dict_t *this,
- char *key,
- data_t *value)
+_dict_set (dict_t *this, char *key, data_t *value, gf_boolean_t replace)
{
int hashval;
data_pair_t *pair;
@@ -240,33 +251,54 @@ _dict_set (dict_t *this,
tmp = SuperFastHash (key, strlen (key));
hashval = (tmp % this->hash_size);
- pair = _dict_lookup (this, key);
- if (pair) {
- data_t *unref_data = pair->value;
- pair->value = data_ref (value);
- data_unref (unref_data);
- if (key_free)
- GF_FREE (key);
- /* Indicates duplicate key */
- return 0;
- }
- pair = mem_get0 (THIS->ctx->dict_pair_pool);
- if (!pair) {
- return -1;
- }
+ /* Search for a existing key if 'replace' is asked for */
+ if (replace) {
+ pair = _dict_lookup (this, key);
- pair->key = (char *) GF_CALLOC (1, strlen (key) + 1,
- gf_common_mt_char);
- if (!pair->key) {
- mem_put (pair);
+ if (pair) {
+ data_t *unref_data = pair->value;
+ pair->value = data_ref (value);
+ data_unref (unref_data);
+ if (key_free)
+ GF_FREE (key);
+ /* Indicates duplicate key */
+ return 0;
+ }
+ }
- if (key_free)
- GF_FREE (key);
- return -1;
+ if (this->free_pair_in_use) {
+ pair = mem_get0 (THIS->ctx->dict_pair_pool);
+ if (!pair) {
+ if (key_free)
+ GF_FREE (key);
+ return -1;
+ }
+ }
+ else {
+ pair = &this->free_pair;
+ this->free_pair_in_use = _gf_true;
}
- strcpy (pair->key, key);
+ if (key_free) {
+ /* It's ours. Use it. */
+ pair->key = key;
+ key_free = 0;
+ }
+ else {
+ pair->key = (char *) GF_CALLOC (1, strlen (key) + 1,
+ gf_common_mt_char);
+ if (!pair->key) {
+ if (pair == &this->free_pair) {
+ this->free_pair_in_use = _gf_false;
+ }
+ else {
+ mem_put (pair);
+ }
+ return -1;
+ }
+ strcpy (pair->key, key);
+ }
pair->value = data_ref (value);
pair->hash_next = this->members[hashval];
@@ -299,7 +331,28 @@ dict_set (dict_t *this,
LOCK (&this->lock);
- ret = _dict_set (this, key, value);
+ ret = _dict_set (this, key, value, 1);
+
+ UNLOCK (&this->lock);
+
+ return ret;
+}
+
+
+int32_t
+dict_add (dict_t *this, char *key, data_t *value)
+{
+ int32_t ret;
+
+ if (!this || !value) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "!this || !value for key=%s", key);
+ return -1;
+ }
+
+ LOCK (&this->lock);
+
+ ret = _dict_set (this, key, value, 0);
UNLOCK (&this->lock);
@@ -363,7 +416,12 @@ dict_del (dict_t *this, char *key)
pair->next->prev = pair->prev;
GF_FREE (pair->key);
- mem_put (pair);
+ if (pair == &this->free_pair) {
+ this->free_pair_in_use = _gf_false;
+ }
+ else {
+ mem_put (pair);
+ }
this->count--;
break;
}
@@ -394,16 +452,18 @@ dict_destroy (dict_t *this)
pair = pair->next;
data_unref (prev->value);
GF_FREE (prev->key);
- mem_put (prev);
+ if (prev != &this->free_pair) {
+ mem_put (prev);
+ }
prev = pair;
}
- mem_put (this->members);
+ if (this->members != &this->members_internal) {
+ mem_put (this->members);
+ }
- if (this->extra_free)
- GF_FREE (this->extra_free);
- if (this->extra_stdfree)
- free (this->extra_stdfree);
+ GF_FREE (this->extra_free);
+ free (this->extra_stdfree);
if (!this->is_static)
mem_put (this);
@@ -836,7 +896,7 @@ data_to_int32 (data_t *data)
int16_t
data_to_int16 (data_t *data)
{
- int16_t value = 0;
+ int16_t value = 0;
if (!data) {
gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
@@ -850,16 +910,16 @@ data_to_int16 (data_t *data)
memcpy (str, data->data, data->len);
str[data->len] = '\0';
- errno = 0;
- value = strtol (str, NULL, 0);
+ errno = 0;
+ value = strtol (str, NULL, 0);
- if ((SHRT_MAX > value) || (SHRT_MIN < value)) {
- errno = ERANGE;
+ if ((value > SHRT_MAX) || (value < SHRT_MIN)) {
+ errno = ERANGE;
gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
+ "Error in data conversion: "
+ "detected overflow");
return -1;
- }
+ }
return (int16_t)value;
}
@@ -868,7 +928,7 @@ data_to_int16 (data_t *data)
int8_t
data_to_int8 (data_t *data)
{
- int32_t value = 0;
+ int8_t value = 0;
if (!data) {
gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
@@ -882,16 +942,16 @@ data_to_int8 (data_t *data)
memcpy (str, data->data, data->len);
str[data->len] = '\0';
- errno = 0;
- value = strtol (str, NULL, 0);
+ errno = 0;
+ value = strtol (str, NULL, 0);
- if ((SCHAR_MAX > value) || (SCHAR_MIN < value)) {
- errno = ERANGE;
+ if ((value > SCHAR_MAX) || (value < SCHAR_MIN)) {
+ errno = ERANGE;
gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
+ "Error in data conversion: "
+ "detected overflow");
return -1;
- }
+ }
return (int8_t)value;
}
@@ -1018,47 +1078,141 @@ data_to_bin (data_t *data)
return data->data;
}
-void
+int
+dict_null_foreach_fn (dict_t *d, char *k,
+ data_t *v, void *tmp)
+{
+ return 0;
+}
+
+int
dict_foreach (dict_t *dict,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
void *data)
{
if (!dict) {
gf_log_callingfn ("dict", GF_LOG_WARNING,
"dict is NULL");
- return;
+ return -1;
}
- data_pair_t *pairs = dict->members_list;
- data_pair_t *next = NULL;
+ int ret = -1;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
+ pairs = dict->members_list;
while (pairs) {
next = pairs->next;
- fn (dict, pairs->key, pairs->value, data);
+ ret = fn (dict, pairs->key, pairs->value, data);
+ if (ret == -1)
+ return -1;
pairs = next;
}
+
+ return 0;
}
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_fnmatch (dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data)
+{
+ if (!dict) {
+ gf_log_callingfn ("dict", GF_LOG_WARNING,
+ "dict is NULL");
+ return 0;
+ }
+
+ int ret = -1;
+ int count = 0;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
-static void
+ pairs = dict->members_list;
+ while (pairs) {
+ next = pairs->next;
+ if (!fnmatch (pattern, pairs->key, 0)) {
+ ret = fn (dict, pairs->key, pairs->value, data);
+ if (ret == -1)
+ return -1;
+ count++;
+ }
+ pairs = next;
+ }
+
+ return count;
+}
+
+
+/**
+ * dict_keys_join - pack the keys of the dictionary in a buffer.
+ *
+ * @value : buffer in which the keys will be packed (can be NULL)
+ * @size : size of the buffer which is sent (can be 0, in which case buffer
+ * is not packed but only length is returned)
+ * @dict : dictionary of which all the keys will be packed
+ * @filter_fn : keys matched in filter_fn() is counted.
+ *
+ * @return : @length of string after joining keys.
+ *
+ */
+
+int
+dict_keys_join (void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *k))
+{
+ int len = 0;
+ data_pair_t *pairs = NULL;
+ data_pair_t *next = NULL;
+
+ pairs = dict->members_list;
+ while (pairs) {
+ next = pairs->next;
+
+ if (filter_fn && filter_fn (pairs->key)){
+ pairs = next;
+ continue;
+ }
+
+ if (value && (size > len))
+ strncpy (value + len, pairs->key, size - len);
+
+ len += (strlen (pairs->key) + 1);
+
+ pairs = next;
+ }
+
+ return len;
+}
+
+static int
_copy (dict_t *unused,
char *key,
data_t *value,
void *newdict)
{
- dict_set ((dict_t *)newdict, key, (value));
+ return dict_set ((dict_t *)newdict, key, (value));
}
-static void
+static int
_remove (dict_t *dict,
char *key,
data_t *value,
void *unused)
{
dict_del ((dict_t *)dict, key);
+ return 0;
}
@@ -2422,6 +2576,7 @@ dict_unserialize (char *orig_buf, int32_t size, dict_t **fill)
"available (%lu) < required (%lu)",
(long)(orig_buf + size),
(long)(buf + vallen));
+ goto out;
}
value = get_new_data ();
value->len = vallen;
@@ -2429,7 +2584,7 @@ dict_unserialize (char *orig_buf, int32_t size, dict_t **fill)
value->is_static = 0;
buf += vallen;
- dict_set (*fill, key, value);
+ dict_add (*fill, key, value);
}
ret = 0;
@@ -2450,7 +2605,7 @@ out:
*/
int32_t
-dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length)
+dict_allocate_and_serialize (dict_t *this, char **buf, u_int *length)
{
int ret = -EINVAL;
ssize_t len = 0;
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index 4e7cf2406..9b41b5a7d 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -29,13 +29,11 @@ typedef struct _data_pair data_pair_t;
#define GF_PROTOCOL_DICT_SERIALIZE(this,from_dict,to,len,ope,labl) do { \
int ret = 0; \
- size_t dictlen = 0; \
\
if (!from_dict) \
break; \
\
- ret = dict_allocate_and_serialize (from_dict, to, \
- &dictlen); \
+ ret = dict_allocate_and_serialize (from_dict, to, &len);\
if (ret < 0) { \
gf_log (this->name, GF_LOG_WARNING, \
"failed to get serialized dict (%s)", \
@@ -43,32 +41,25 @@ typedef struct _data_pair data_pair_t;
ope = EINVAL; \
goto labl; \
} \
- len = dictlen; \
} while (0)
#define GF_PROTOCOL_DICT_UNSERIALIZE(xl,to,buff,len,ret,ope,labl) do { \
- char *buf = NULL; \
if (!len) \
break; \
to = dict_new(); \
GF_VALIDATE_OR_GOTO (xl->name, to, labl); \
\
- buf = memdup (buff, len); \
- GF_VALIDATE_OR_GOTO (xl->name, buf, labl); \
- \
- ret = dict_unserialize (buf, len, &to); \
+ ret = dict_unserialize (buff, len, &to); \
if (ret < 0) { \
gf_log (xl->name, GF_LOG_WARNING, \
"failed to unserialize dictionary (%s)", \
(#to)); \
\
ope = EINVAL; \
- GF_FREE (buf); \
goto labl; \
} \
\
- to->extra_free = buf; \
} while (0)
struct _data {
@@ -99,13 +90,20 @@ struct _dict {
char *extra_free;
char *extra_stdfree;
gf_lock_t lock;
+ data_pair_t *members_internal;
+ data_pair_t free_pair;
+ gf_boolean_t free_pair_in_use;
};
int32_t is_data_equal (data_t *one, data_t *two);
void data_destroy (data_t *data);
+/* function to set a key/value pair (overwrite existing if matches the key */
int32_t dict_set (dict_t *this, char *key, data_t *value);
+/* function to set a new key/value pair (without checking for duplicate) */
+int32_t dict_add (dict_t *this, char *key, data_t *value);
+
data_t *dict_get (dict_t *this, char *key);
void dict_del (dict_t *this, char *key);
int dict_reset (dict_t *dict);
@@ -114,7 +112,7 @@ int32_t dict_serialized_length (dict_t *dict);
int32_t dict_serialize (dict_t *dict, char *buf);
int32_t dict_unserialize (char *buf, int32_t size, dict_t **fill);
-int32_t dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length);
+int32_t dict_allocate_and_serialize (dict_t *this, char **buf, u_int *length);
void dict_destroy (dict_t *dict);
void dict_unref (dict_t *dict);
@@ -122,7 +120,7 @@ dict_t *dict_ref (dict_t *dict);
data_t *data_ref (data_t *data);
void data_unref (data_t *data);
-int32_t dict_lookup (dict_t *this, char *key, data_pair_t **data);
+int32_t dict_lookup (dict_t *this, char *key, data_t **data);
/*
TODO: provide converts for differnt byte sizes, signedness, and void *
*/
@@ -165,16 +163,26 @@ data_t * data_copy (data_t *old);
dict_t *get_new_dict_full (int size_hint);
dict_t *get_new_dict ();
-data_pair_t *get_new_data_pair ();
+int dict_foreach (dict_t *this,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data);
-void dict_foreach (dict_t *this,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data);
+int dict_foreach_fnmatch (dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this,
+ char *key,
+ data_t *value,
+ void *data),
+ void *data);
+
+int dict_null_foreach_fn (dict_t *d, char *k,
+ data_t *v, void *tmp);
dict_t *dict_copy (dict_t *this, dict_t *new);
+int dict_keys_join (void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *key));
/* CLEANED UP FUNCTIONS DECLARATIONS */
GF_MUST_CHECK dict_t *dict_new (void);
@@ -226,5 +234,4 @@ GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int3
char delimiter);
void dict_dump (dict_t *dict);
-
#endif
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
new file mode 100644
index 000000000..06b323624
--- /dev/null
+++ b/libglusterfs/src/event-epoll.c
@@ -0,0 +1,463 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "logging.h"
+#include "event.h"
+#include "mem-pool.h"
+#include "common-utils.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+
+
+static int
+__event_getindex (struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd)
+ ret = idx;
+ }
+
+ for (i=0; ret == -1 && i<event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static struct event_pool *
+event_pool_new_epoll (int count)
+{
+ struct event_pool *event_pool = NULL;
+ int epfd = -1;
+
+ event_pool = GF_CALLOC (1, sizeof (*event_pool),
+ gf_common_mt_event_pool);
+
+ if (!event_pool)
+ goto out;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC (event_pool->count,
+ sizeof (*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE (event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ epfd = epoll_create (count);
+
+ if (epfd == -1) {
+ gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
+ strerror (errno));
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ event_pool->fd = epfd;
+
+ event_pool->count = count;
+
+ pthread_mutex_init (&event_pool->mutex, NULL);
+ pthread_cond_init (&event_pool->cond, NULL);
+
+out:
+ return event_pool;
+}
+
+
+int
+event_register_epoll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out)
+{
+ int idx = -1;
+ int ret = -1;
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used) {
+ event_pool->count *= 2;
+
+ event_pool->reg = GF_REALLOC (event_pool->reg,
+ event_pool->count *
+ sizeof (*event_pool->reg));
+
+ if (!event_pool->reg) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "event registry re-allocation failed");
+ goto unlock;
+ }
+ }
+
+ idx = event_pool->used;
+ event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = EPOLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ event_pool->changed = 1;
+
+ epoll_event.events = event_pool->reg[idx].events;
+ ev_data->fd = fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
+ &epoll_event);
+
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "failed to add fd(=%d) to epoll fd(=%d) (%s)",
+ fd, event_pool->fd, strerror (errno));
+ goto unlock;
+ }
+
+ pthread_cond_broadcast (&event_pool->cond);
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+ int ret = -1;
+
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ int lastidx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
+
+ /* if ret is -1, this array member should never be accessed */
+ /* if it is 0, the array member might be used by idx_cache
+ * in which case the member should not be accessed till
+ * it is reallocated
+ */
+
+ event_pool->reg[idx].fd = -1;
+
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "fail to del fd(=%d) from epoll fd(=%d) (%s)",
+ fd, event_pool->fd, strerror (errno));
+ goto unlock;
+ }
+
+ lastidx = event_pool->used - 1;
+ if (lastidx == idx) {
+ event_pool->used--;
+ goto unlock;
+ }
+
+ epoll_event.events = event_pool->reg[lastidx].events;
+ ev_data->fd = event_pool->reg[lastidx].fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
+ &epoll_event);
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "fail to modify fd(=%d) index %d to %d (%s)",
+ ev_data->fd, event_pool->used, idx,
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* just replace the unregistered idx by last one */
+ event_pool->reg[idx] = event_pool->reg[lastidx];
+ event_pool->used--;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+ int ret = -1;
+
+ struct epoll_event epoll_event = {0, };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= EPOLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("epoll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ epoll_event.events = event_pool->reg[idx].events;
+ ev_data->fd = fd;
+ ev_data->idx = idx;
+
+ ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
+ &epoll_event);
+ if (ret == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "failed to modify fd(=%d) events to %d",
+ fd, epoll_event.events);
+ }
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return ret;
+}
+
+
+static int
+event_dispatch_epoll_handler (struct event_pool *event_pool,
+ struct epoll_event *events, int i)
+{
+ struct event_data *event_data = NULL;
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = -1;
+
+
+ event_data = (void *)&events[i].data;
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, event_data->fd,
+ event_data->idx);
+
+ if (idx == -1) {
+ gf_log ("epoll", GF_LOG_ERROR,
+ "index not found for fd(=%d) (idx_hint=%d)",
+ event_data->fd, event_data->idx);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ if (handler)
+ ret = handler (event_data->fd, event_data->idx, data,
+ (events[i].events & (EPOLLIN|EPOLLPRI)),
+ (events[i].events & (EPOLLOUT)),
+ (events[i].events & (EPOLLERR|EPOLLHUP)));
+ return ret;
+}
+
+
+static int
+event_dispatch_epoll (struct event_pool *event_pool)
+{
+ struct epoll_event *events = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ while (1) {
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ while (event_pool->used == 0)
+ pthread_cond_wait (&event_pool->cond,
+ &event_pool->mutex);
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE (event_pool->evcache);
+
+ event_pool->evcache = events = NULL;
+
+ event_pool->evcache_size =
+ event_pool->used + 256;
+
+ events = GF_CALLOC (event_pool->evcache_size,
+ sizeof (struct epoll_event),
+ gf_common_mt_epoll_event);
+ if (!events)
+ break;
+
+ event_pool->evcache = events;
+ }
+ }
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ ret = epoll_wait (event_pool->fd, event_pool->evcache,
+ event_pool->evcache_size, -1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ size = ret;
+
+ for (i = 0; i < size; i++) {
+ if (!events || !events[i].events)
+ continue;
+
+ ret = event_dispatch_epoll_handler (event_pool,
+ events, i);
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+struct event_ops event_ops_epoll = {
+ .new = event_pool_new_epoll,
+ .event_register = event_register_epoll,
+ .event_select_on = event_select_on_epoll,
+ .event_unregister = event_unregister_epoll,
+ .event_dispatch = event_dispatch_epoll
+};
+
+#endif
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
index fe511caeb..82baa521a 100644
--- a/libglusterfs/src/event-history.c
+++ b/libglusterfs/src/event-history.c
@@ -11,7 +11,8 @@
#include "event-history.h"
eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once)
+eh_new (size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_buffer_data) (void *data))
{
eh_t *history = NULL;
buffer_t *buffer = NULL;
@@ -22,7 +23,8 @@ eh_new (size_t buffer_size, gf_boolean_t use_buffer_once)
goto out;
}
- buffer = cb_buffer_new (buffer_size, use_buffer_once);
+ buffer = cb_buffer_new (buffer_size, use_buffer_once,
+ destroy_buffer_data);
if (!buffer) {
gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed");
GF_FREE (history);
diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h
index b1750bbae..b64f63b5e 100644
--- a/libglusterfs/src/event-history.h
+++ b/libglusterfs/src/event-history.h
@@ -32,7 +32,8 @@ eh_dump (eh_t *event , void *data,
int (fn) (circular_buffer_t *buffer, void *data));
eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once);
+eh_new (size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data) (void *data));
int
eh_save_history (eh_t *history, void *string);
diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c
new file mode 100644
index 000000000..7f7f560d0
--- /dev/null
+++ b/libglusterfs/src/event-poll.c
@@ -0,0 +1,451 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "logging.h"
+#include "event.h"
+#include "mem-pool.h"
+#include "common-utils.h"
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+static int
+event_register_poll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out);
+
+
+static int
+__flush_fd (int fd, int idx, void *data,
+ int poll_in, int poll_out, int poll_err)
+{
+ char buf[64];
+ int ret = -1;
+
+ if (!poll_in)
+ return ret;
+
+ do {
+ ret = read (fd, buf, 64);
+ if (ret == -1 && errno != EAGAIN) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "read on %d returned error (%s)",
+ fd, strerror (errno));
+ }
+ } while (ret == 64);
+
+ return ret;
+}
+
+
+static int
+__event_getindex (struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd)
+ ret = idx;
+ }
+
+ for (i=0; ret == -1 && i<event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+
+static struct event_pool *
+event_pool_new_poll (int count)
+{
+ struct event_pool *event_pool = NULL;
+ int ret = -1;
+
+ event_pool = GF_CALLOC (1, sizeof (*event_pool),
+ gf_common_mt_event_pool);
+
+ if (!event_pool)
+ return NULL;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC (event_pool->count,
+ sizeof (*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ pthread_mutex_init (&event_pool->mutex, NULL);
+
+ ret = pipe (event_pool->breaker);
+
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "pipe creation failed (%s)", strerror (errno));
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not set pipe to non blocking mode (%s)",
+ strerror (errno));
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not set pipe to non blocking mode (%s)",
+ strerror (errno));
+
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ ret = event_register_poll (event_pool, event_pool->breaker[0],
+ __flush_fd, NULL, 1, 0);
+ if (ret == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "could not register pipe fd with poll event loop");
+ close (event_pool->breaker[0]);
+ close (event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE (event_pool->reg);
+ GF_FREE (event_pool);
+ return NULL;
+ }
+
+ return event_pool;
+}
+
+
+static int
+event_register_poll (struct event_pool *event_pool, int fd,
+ event_handler_t handler,
+ void *data, int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used)
+ {
+ event_pool->count += 256;
+
+ event_pool->reg = GF_REALLOC (event_pool->reg,
+ event_pool->count *
+ sizeof (*event_pool->reg));
+ if (!event_pool->reg)
+ goto unlock;
+ }
+
+ idx = event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = POLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("poll", GF_LOG_ERROR,
+ "invalid poll_in value %d", poll_in);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_log ("poll", GF_LOG_ERROR,
+ "invalid poll_out value %d", poll_out);
+ break;
+ }
+
+ event_pool->changed = 1;
+
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ event_pool->reg[idx] = event_pool->reg[--event_pool->used];
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ fd, idx_hint);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ if (poll_in + poll_out > -2)
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+
+static int
+event_dispatch_poll_handler (struct event_pool *event_pool,
+ struct pollfd *ufds, int i)
+{
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = 0;
+
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ idx = __event_getindex (event_pool, ufds[i].fd, i);
+
+ if (idx == -1) {
+ gf_log ("poll", GF_LOG_ERROR,
+ "index not found for fd=%d (idx_hint=%d)",
+ ufds[i].fd, i);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ if (handler)
+ ret = handler (ufds[i].fd, idx, data,
+ (ufds[i].revents & (POLLIN|POLLPRI)),
+ (ufds[i].revents & (POLLOUT)),
+ (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
+
+ return ret;
+}
+
+
+static int
+event_dispatch_poll_resize (struct event_pool *event_pool,
+ struct pollfd *ufds, int size)
+{
+ int i = 0;
+
+ pthread_mutex_lock (&event_pool->mutex);
+ {
+ if (event_pool->changed == 0) {
+ goto unlock;
+ }
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE (event_pool->evcache);
+
+ event_pool->evcache = ufds = NULL;
+
+ event_pool->evcache_size = event_pool->used;
+
+ ufds = GF_CALLOC (sizeof (struct pollfd),
+ event_pool->evcache_size,
+ gf_common_mt_pollfd);
+ if (!ufds)
+ goto unlock;
+ event_pool->evcache = ufds;
+ }
+
+ for (i = 0; i < event_pool->used; i++) {
+ ufds[i].fd = event_pool->reg[i].fd;
+ ufds[i].events = event_pool->reg[i].events;
+ ufds[i].revents = 0;
+ }
+
+ size = i;
+ }
+unlock:
+ pthread_mutex_unlock (&event_pool->mutex);
+
+ return size;
+}
+
+
+static int
+event_dispatch_poll (struct event_pool *event_pool)
+{
+ struct pollfd *ufds = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+
+ while (1) {
+ size = event_dispatch_poll_resize (event_pool, ufds, size);
+ ufds = event_pool->evcache;
+
+ ret = poll (ufds, size, 1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ for (i = 0; i < size; i++) {
+ if (!ufds[i].revents)
+ continue;
+
+ event_dispatch_poll_handler (event_pool, ufds, i);
+ }
+ }
+
+out:
+ return -1;
+}
+
+
+struct event_ops event_ops_poll = {
+ .new = event_pool_new_poll,
+ .event_register = event_register_poll,
+ .event_select_on = event_select_on_poll,
+ .event_unregister = event_unregister_poll,
+ .event_dispatch = event_dispatch_poll
+};
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index 8f172fb24..0197e7948 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -26,851 +26,17 @@
#include "config.h"
#endif
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
-
-static int
-__flush_fd (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- char buf[64];
- int ret = -1;
-
- if (!poll_in)
- return ret;
-
- do {
- ret = read (fd, buf, 64);
- if (ret == -1 && errno != EAGAIN) {
- gf_log ("poll", GF_LOG_ERROR,
- "read on %d returned error (%s)",
- fd, strerror (errno));
- }
- } while (ret == 64);
-
- return ret;
-}
-
-
-static int
-__event_getindex (struct event_pool *event_pool, int fd, int idx)
-{
- int ret = -1;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- if (idx > -1 && idx < event_pool->used) {
- if (event_pool->reg[idx].fd == fd)
- ret = idx;
- }
-
- for (i=0; ret == -1 && i<event_pool->used; i++) {
- if (event_pool->reg[i].fd == fd) {
- ret = i;
- break;
- }
- }
-
-out:
- return ret;
-}
-
-
-static struct event_pool *
-event_pool_new_poll (int count)
-{
- struct event_pool *event_pool = NULL;
- int ret = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
-
- if (!event_pool)
- return NULL;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- return NULL;
- }
-
- pthread_mutex_init (&event_pool->mutex, NULL);
-
- ret = pipe (event_pool->breaker);
-
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "pipe creation failed (%s)", strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
-
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- ret = event_register_poll (event_pool, event_pool->breaker[0],
- __flush_fd, NULL, 1, 0);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not register pipe fd with poll event loop");
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- return event_pool;
-}
-
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used)
- {
- event_pool->count += 256;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
- if (!event_pool->reg)
- goto unlock;
- }
-
- idx = event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = POLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return idx;
-}
-
-
-static int
-event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- event_pool->reg[idx] = event_pool->reg[--event_pool->used];
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return idx;
-}
-
-
-static int
-event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- if (poll_in + poll_out > -2)
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return idx;
-}
-
-
-static int
-event_dispatch_poll_handler (struct event_pool *event_pool,
- struct pollfd *ufds, int i)
-{
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = 0;
-
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, ufds[i].fd, i);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- ufds[i].fd, i);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (ufds[i].fd, idx, data,
- (ufds[i].revents & (POLLIN|POLLPRI)),
- (ufds[i].revents & (POLLOUT)),
- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
-
- return ret;
-}
-
-
-static int
-event_dispatch_poll_resize (struct event_pool *event_pool,
- struct pollfd *ufds, int size)
-{
- int i = 0;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->changed == 0) {
- goto unlock;
- }
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
-
- event_pool->evcache = ufds = NULL;
-
- event_pool->evcache_size = event_pool->used;
-
- ufds = GF_CALLOC (sizeof (struct pollfd),
- event_pool->evcache_size,
- gf_common_mt_pollfd);
- if (!ufds)
- goto unlock;
- event_pool->evcache = ufds;
- }
-
- for (i = 0; i < event_pool->used; i++) {
- ufds[i].fd = event_pool->reg[i].fd;
- ufds[i].events = event_pool->reg[i].events;
- ufds[i].revents = 0;
- }
-
- size = i;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return size;
-}
-
-
-static int
-event_dispatch_poll (struct event_pool *event_pool)
-{
- struct pollfd *ufds = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- size = event_dispatch_poll_resize (event_pool, ufds, size);
- ufds = event_pool->evcache;
-
- ret = poll (ufds, size, 1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- for (i = 0; i < size; i++) {
- if (!ufds[i].revents)
- continue;
-
- event_dispatch_poll_handler (event_pool, ufds, i);
- }
- }
-
-out:
- return -1;
-}
-
-
-static struct event_ops event_ops_poll = {
- .new = event_pool_new_poll,
- .event_register = event_register_poll,
- .event_select_on = event_select_on_poll,
- .event_unregister = event_unregister_poll,
- .event_dispatch = event_dispatch_poll
-};
-
-
-
-#ifdef HAVE_SYS_EPOLL_H
-#include <sys/epoll.h>
-
-
-static struct event_pool *
-event_pool_new_epoll (int count)
-{
- struct event_pool *event_pool = NULL;
- int epfd = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
-
- if (!event_pool)
- goto out;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- epfd = epoll_create (count);
-
- if (epfd == -1) {
- gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
- strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- event_pool->fd = epfd;
-
- event_pool->count = count;
-
- pthread_mutex_init (&event_pool->mutex, NULL);
- pthread_cond_init (&event_pool->cond, NULL);
-
-out:
- return event_pool;
-}
-
-
-int
-event_register_epoll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used) {
- event_pool->count *= 2;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("epoll", GF_LOG_ERROR,
- "event registry re-allocation failed");
- goto unlock;
- }
- }
-
- idx = event_pool->used;
- event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = EPOLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
- &epoll_event);
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to add fd(=%d) to epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- pthread_cond_broadcast (&event_pool->cond);
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return ret;
-}
-
-
-static int
-event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
- int lastidx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
-
- /* if ret is -1, this array member should never be accessed */
- /* if it is 0, the array member might be used by idx_cache
- * in which case the member should not be accessed till
- * it is reallocated
- */
-
- event_pool->reg[idx].fd = -1;
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to del fd(=%d) from epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- lastidx = event_pool->used - 1;
- if (lastidx == idx) {
- event_pool->used--;
- goto unlock;
- }
-
- epoll_event.events = event_pool->reg[lastidx].events;
- ev_data->fd = event_pool->reg[lastidx].fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to modify fd(=%d) index %d to %d (%s)",
- ev_data->fd, event_pool->used, idx,
- strerror (errno));
- goto unlock;
- }
-
- /* just replace the unregistered idx by last one */
- event_pool->reg[idx] = event_pool->reg[lastidx];
- event_pool->used--;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return ret;
-}
-
-
-static int
-event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to modify fd(=%d) events to %d",
- fd, epoll_event.events);
- }
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return ret;
-}
-
-
-static int
-event_dispatch_epoll_handler (struct event_pool *event_pool,
- struct epoll_event *events, int i)
-{
- struct event_data *event_data = NULL;
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = -1;
-
-
- event_data = (void *)&events[i].data;
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, event_data->fd,
- event_data->idx);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd(=%d) (idx_hint=%d)",
- event_data->fd, event_data->idx);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (event_data->fd, event_data->idx, data,
- (events[i].events & (EPOLLIN|EPOLLPRI)),
- (events[i].events & (EPOLLOUT)),
- (events[i].events & (EPOLLERR|EPOLLHUP)));
- return ret;
-}
-
-
-static int
-event_dispatch_epoll (struct event_pool *event_pool)
-{
- struct epoll_event *events = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- pthread_mutex_lock (&event_pool->mutex);
- {
- while (event_pool->used == 0)
- pthread_cond_wait (&event_pool->cond,
- &event_pool->mutex);
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
-
- event_pool->evcache = events = NULL;
-
- event_pool->evcache_size =
- event_pool->used + 256;
-
- events = GF_CALLOC (event_pool->evcache_size,
- sizeof (struct epoll_event),
- gf_common_mt_epoll_event);
- if (!events)
- break;
-
- event_pool->evcache = events;
- }
- }
- pthread_mutex_unlock (&event_pool->mutex);
-
- ret = epoll_wait (event_pool->fd, event_pool->evcache,
- event_pool->evcache_size, -1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- size = ret;
-
- for (i = 0; i < size; i++) {
- if (!events || !events[i].events)
- continue;
-
- ret = event_dispatch_epoll_handler (event_pool,
- events, i);
- }
- }
-
-out:
- return ret;
-}
-
-
-static struct event_ops event_ops_epoll = {
- .new = event_pool_new_epoll,
- .event_register = event_register_epoll,
- .event_select_on = event_select_on_epoll,
- .event_unregister = event_unregister_epoll,
- .event_dispatch = event_dispatch_epoll
-};
-
-#endif
struct event_pool *
event_pool_new (int count)
{
struct event_pool *event_pool = NULL;
+ extern struct event_ops event_ops_poll;
#ifdef HAVE_SYS_EPOLL_H
+ extern struct event_ops event_ops_epoll;
+
event_pool = event_ops_epoll.new (count);
if (event_pool) {
diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h
index f2f8029ae..7ed182492 100644
--- a/libglusterfs/src/event.h
+++ b/libglusterfs/src/event.h
@@ -21,8 +21,8 @@
struct event_pool;
struct event_ops;
struct event_data {
- int fd;
- int idx;
+ int fd;
+ int idx;
} __attribute__ ((__packed__, __may_alias__));
@@ -30,28 +30,27 @@ typedef int (*event_handler_t) (int fd, int idx, void *data,
int poll_in, int poll_out, int poll_err);
struct event_pool {
- struct event_ops *ops;
+ struct event_ops *ops;
- int fd;
- int breaker[2];
+ int fd;
+ int breaker[2];
- int count;
- struct {
- int fd;
- int events;
- void *data;
- event_handler_t handler;
- } *reg;
+ int count;
+ struct {
+ int fd;
+ int events;
+ void *data;
+ event_handler_t handler;
+ } *reg;
- int used;
- int idx_cache;
- int changed;
+ int used;
+ int changed;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
- void *evcache;
- int evcache_size;
+ void *evcache;
+ int evcache_size;
};
struct event_ops {
diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c
index 305f58826..caf2bb38e 100644
--- a/libglusterfs/src/fd-lk.c
+++ b/libglusterfs/src/fd-lk.c
@@ -369,7 +369,7 @@ _fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
if (entry->fl_type == lock->fl_type) {
sum = _fd_lk_add_locks (entry, lock);
- if (sum)
+ if (!sum)
return;
sum->fl_type = entry->fl_type;
sum->user_flock.l_type = entry->fl_type;
@@ -380,8 +380,8 @@ _fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
return;
} else {
sum = _fd_lk_add_locks (entry, lock);
- sum->fl_type = entry->fl_type;
- sum->user_flock.l_type = entry->fl_type;
+ sum->fl_type = lock->fl_type;
+ sum->user_flock.l_type = lock->fl_type;
ret = _fd_lk_sub_locks (&v, sum, lock);
if (ret)
return;
@@ -391,6 +391,8 @@ _fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
_fd_lk_delete_lock (lock);
_fd_lk_destroy_lock (lock);
+ _fd_lk_destroy_lock (sum);
+
for (i = 0; i < 3; i++) {
if (!v.locks[i])
continue;
@@ -407,7 +409,7 @@ _fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
if (lock->fl_type != F_UNLCK) {
_fd_lk_insert_lock (lk_ctx, lock);
} else {
- _fd_lk_destroy_lock_list (lk_ctx);
+ _fd_lk_destroy_lock (lock);
}
}
@@ -468,3 +470,21 @@ fd_lk_insert_and_merge (fd_t *fd, int32_t cmd,
out:
return ret;
}
+
+
+gf_boolean_t
+fd_lk_ctx_empty (fd_lk_ctx_t *lk_ctx)
+{
+ gf_boolean_t verdict = _gf_true;
+
+ if (!lk_ctx)
+ return _gf_true;
+
+ LOCK (&lk_ctx->lock);
+ {
+ verdict = list_empty (&lk_ctx->lk_list);
+ }
+ UNLOCK (&lk_ctx->lock);
+
+ return verdict;
+}
diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/fd-lk.h
index bdea8c2a0..1d2ff794c 100644
--- a/libglusterfs/src/fd-lk.h
+++ b/libglusterfs/src/fd-lk.h
@@ -18,6 +18,7 @@
#include "mem-pool.h"
#include "mem-types.h"
#include "glusterfs.h"
+#include "common-utils.h"
#define get_lk_type(type) \
type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
@@ -63,4 +64,7 @@ fd_lk_insert_and_merge (struct _fd *lk_ctx, int32_t cmd,
int
fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx);
+gf_boolean_t
+fd_lk_ctx_empty (fd_lk_ctx_t *lk_ctx);
+
#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 3a7a59278..36cc4d056 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -121,7 +121,7 @@ gf_fd_fdtable_alloc (void)
}
-fdentry_t *
+static fdentry_t *
__gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
{
fdentry_t *fdentries = NULL;
@@ -159,7 +159,7 @@ gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
}
-fdentry_t *
+static fdentry_t *
__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
{
fdentry_t *fdentries = NULL;
@@ -513,6 +513,11 @@ fd_destroy (fd_t *fd)
LOCK_DESTROY (&fd->lock);
GF_FREE (fd->_ctx);
+ LOCK (&fd->inode->lock);
+ {
+ fd->inode->fd_count--;
+ }
+ UNLOCK (&fd->inode->lock);
inode_unref (fd->inode);
fd->inode = (inode_t *)0xaaaaaaaa;
fd_lk_ctx_unref (fd->lk_ctx);
@@ -552,6 +557,7 @@ __fd_bind (fd_t *fd)
{
list_del_init (&fd->inode_list);
list_add (&fd->inode_list, &fd->inode->fd_list);
+ fd->inode->fd_count++;
return fd;
}
@@ -659,6 +665,12 @@ __fd_lookup (inode_t *inode, uint64_t pid)
list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (iter_fd->anonymous)
+ /* If someone was interested in getting an
+ anonymous fd (or was OK getting an anonymous fd),
+ they can as well call fd_anonymous() directly */
+ continue;
+
if (!pid || iter_fd->pid == pid) {
fd = __fd_ref (iter_fd);
break;
@@ -707,24 +719,44 @@ fd_lookup_uint64 (inode_t *inode, uint64_t pid)
return fd;
}
+static fd_t *
+__fd_lookup_anonymous (inode_t *inode)
+{
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
-fd_t *
+ if (list_empty (&inode->fd_list))
+ return NULL;
+
+ list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
+ if (iter_fd->anonymous) {
+ fd = __fd_ref (iter_fd);
+ break;
+ }
+ }
+
+ return fd;
+}
+
+static fd_t *
__fd_anonymous (inode_t *inode)
{
fd_t *fd = NULL;
- fd = __fd_lookup (inode, (uint64_t)-1);
+ fd = __fd_lookup_anonymous (inode);
/* if (fd); then we already have increased the refcount in
- __fd_lookup(), so no need of one more fd_ref().
+ __fd_lookup_anonymous(), so no need of one more fd_ref().
if (!fd); then both create and bind wont bump up the ref
count, so we have to call fd_ref() after bind. */
if (!fd) {
- fd = __fd_create (inode, (uint64_t)-1);
+ fd = __fd_create (inode, 0);
if (!fd)
return NULL;
+ fd->anonymous = _gf_true;
+
__fd_bind (fd);
__fd_ref (fd);
@@ -748,11 +780,28 @@ fd_anonymous (inode_t *inode)
return fd;
}
+fd_t*
+fd_lookup_anonymous (inode_t *inode)
+{
+ fd_t *fd = NULL;
+
+ if (!inode) {
+ gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
+ return NULL;
+ }
+
+ LOCK (&inode->lock);
+ {
+ fd = __fd_lookup_anonymous (inode);
+ }
+ UNLOCK (&inode->lock);
+ return fd;
+}
gf_boolean_t
fd_is_anonymous (fd_t *fd)
{
- return (fd && fd->pid == -1);
+ return (fd && fd->anonymous);
}
@@ -960,6 +1009,13 @@ fd_dump (fd_t *fd, char *prefix)
gf_proc_dump_write("pid", "%llu", fd->pid);
gf_proc_dump_write("refcount", "%d", fd->refcount);
gf_proc_dump_write("flags", "%d", fd->flags);
+
+ if (fd->inode) {
+ gf_proc_dump_build_key (key, "inode", NULL);
+ gf_proc_dump_add_section(key);
+ inode_dump (fd->inode, key);
+ }
+
}
@@ -989,10 +1045,8 @@ fdtable_dump (fdtable_t *fdtable, char *prefix)
ret = pthread_mutex_trylock (&fdtable->lock);
- if (ret) {
- gf_log ("fd", GF_LOG_WARNING, "Unable to acquire lock");
- return;
- }
+ if (ret)
+ goto out;
memset(key, 0, sizeof(key));
gf_proc_dump_build_key(key, prefix, "refcount");
@@ -1012,6 +1066,12 @@ fdtable_dump (fdtable_t *fdtable, char *prefix)
}
pthread_mutex_unlock(&fdtable->lock);
+
+out:
+ if (ret != 0)
+ gf_proc_dump_write ("Unable to dump the fdtable",
+ "(Lock acquistion failed) %p", fdtable);
+ return;
}
@@ -1057,9 +1117,7 @@ unlock:
}
out:
- if (fd_ctx != NULL) {
- GF_FREE (fd_ctx);
- }
+ GF_FREE (fd_ctx);
return;
}
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
index 42df22b95..c1b9157d8 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/fd.h
@@ -22,6 +22,9 @@
#include "glusterfs.h"
#include "locking.h"
#include "fd-lk.h"
+#include "common-utils.h"
+
+#define GF_ANON_FD_NO -2
struct _inode;
struct _dict;
@@ -38,12 +41,8 @@ struct _fd_ctx {
};
};
-/* If this structure changes, please have mercy on the booster maintainer
- * and update the fd_t struct in booster/src/booster-fd.h.
- * See the comment there to know why.
- */
struct _fd {
- uint64_t pid;
+ uint64_t pid;
int32_t flags;
int32_t refcount;
struct list_head inode_list;
@@ -53,6 +52,7 @@ struct _fd {
struct _fd_ctx *_ctx;
int xl_count; /* Number of xl referred in this fd */
struct fd_lk_ctx *lk_ctx;
+ gf_boolean_t anonymous; /* geo-rep anonymous fd */
};
typedef struct _fd fd_t;
@@ -86,7 +86,7 @@ typedef struct _fdtable fdtable_t;
#include "xlator.h"
-inline void
+void
gf_fd_put (fdtable_t *fdtable, int32_t fd);
@@ -118,10 +118,6 @@ fd_t *
fd_ref (fd_t *fd);
-fd_t *
-__fd_unref (fd_t *fd);
-
-
void
fd_unref (fd_t *fd);
@@ -138,6 +134,9 @@ fd_lookup (struct _inode *inode, pid_t pid);
fd_t *
fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
+fd_t*
+fd_lookup_anonymous (inode_t *inode);
+
fd_t *
fd_anonymous (inode_t *inode);
@@ -153,8 +152,6 @@ fd_list_empty (struct _inode *inode);
fd_t *
fd_bind (fd_t *fd);
-fd_t *
-__fd_bind (fd_t *fd);
int
fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
@@ -167,20 +164,17 @@ fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
int
fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
int
-__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
+__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
int
-__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
+__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
int
-__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
+__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-fd_t *
-__fd_ref (fd_t *fd);
void
fd_ctx_dump (fd_t *fd, char *prefix);
@@ -188,8 +182,6 @@ fd_ctx_dump (fd_t *fd, char *prefix);
fdentry_t *
gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
-fdentry_t *
-__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
void
gf_fdptr_put (fdtable_t *fdtable, fd_t *fd);
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
index 36a5a629c..588d522db 100644
--- a/libglusterfs/src/gf-dirent.h
+++ b/libglusterfs/src/gf-dirent.h
@@ -45,9 +45,10 @@ struct _gf_dirent_t {
struct iatt d_stat;
dict_t *dict;
inode_t *inode;
- char d_name[0];
+ char d_name[];
};
+#define DT_ISDIR(mode) (mode == DT_DIR)
gf_dirent_t *gf_dirent_for_name (const char *name);
void gf_dirent_free (gf_dirent_t *entries);
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 000000000..c5bdda925
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,192 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gidcache.h"
+#include "mem-pool.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval. Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p,n) ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK_INIT(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+ memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+ return 0;
+}
+
+/*
+ * Reconfigure the cache timeout.
+ */
+int gid_cache_reconf(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ UNLOCK(&cache->gc_lock);
+
+ return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+{
+ int bucket;
+ int i;
+ time_t now;
+ const gid_list_t *agl;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+ bucket = id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+ if (!agl->gl_list)
+ continue;
+ if (agl->gl_id != id)
+ continue;
+
+ /*
+ * We don't put new entries in the cache when expiration=0, but
+ * there might be entries still in there if expiration was
+ * changed very recently. Writing the check this way ensures
+ * that they're not used.
+ */
+ if (now < agl->gl_deadline) {
+ return agl;
+ }
+
+ /*
+ * We're not going to find any more UID matches, and reaping
+ * is handled further down to maintain LRU order.
+ */
+ break;
+ }
+ UNLOCK(&cache->gc_lock);
+ return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+ UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+ gid_list_t *agl;
+ int bucket;
+ int i;
+ time_t now;
+
+ if (!gl || !gl->gl_list)
+ return -1;
+
+ if (!cache->gc_max_age)
+ return 0;
+
+ LOCK(&cache->gc_lock);
+ now = time(NULL);
+
+ /*
+ * Scan for the first free entry or one that matches this id. The id
+ * check is added to address a bug where the cache might contain an
+ * expired entry for this id. Since lookup occurs in LRU order and
+ * does not reclaim entries, it will always return failure on discovery
+ * of an expired entry. This leads to duplicate entries being added,
+ * which still do not satisfy lookups until the expired entry (and
+ * everything before it) is reclaimed.
+ *
+ * We address this through reuse of an entry already allocated to this
+ * id, whether expired or not, since we have obviously already received
+ * more recent data. The entry is repopulated with the new data and a new
+ * deadline and is pushed forward to reside as the last populated entry in
+ * the bucket.
+ */
+ bucket = gl->gl_id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+ if (agl->gl_id == gl->gl_id)
+ break;
+ if (!agl->gl_list)
+ break;
+ }
+
+ /*
+ * The way we allocate free entries naturally places the newest
+ * ones at the highest indices, so evicting the lowest makes
+ * sense, but that also means we can't just replace it with the
+ * one that caused the eviction. That would cause us to thrash
+ * the first entry while others remain idle. Therefore, we
+ * need to slide the other entries down and add the new one at
+ * the end just as if the *last* slot had been free.
+ *
+ * Deadline expiration is also handled here, since the oldest
+ * expired entry will be in the first position. This does mean
+ * the bucket can stay full of expired entries if we're idle
+ * but, if the small amount of extra memory or scan time before
+ * we decide to evict someone ever become issues, we could
+ * easily add a reaper thread.
+ */
+
+ if (i >= AUX_GID_CACHE_ASSOC) {
+ /* cache full, evict the first (LRU) entry */
+ i = 0;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ GF_FREE(agl->gl_list);
+ } else if (agl->gl_list) {
+ /* evict the old entry we plan to reuse */
+ GF_FREE(agl->gl_list);
+ }
+
+ /*
+ * If we have evicted an entry, slide the subsequent populated entries
+ * back and populate the last entry.
+ */
+ for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+ if (!agl[1].gl_list)
+ break;
+ agl[0] = agl[1];
+ agl++;
+ }
+
+ agl->gl_id = gl->gl_id;
+ agl->gl_count = gl->gl_count;
+ agl->gl_list = gl->gl_list;
+ agl->gl_deadline = now + cache->gc_max_age;
+
+ UNLOCK(&cache->gc_lock);
+
+ return 1;
+}
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/gidcache.h
new file mode 100644
index 000000000..9379f8e8b
--- /dev/null
+++ b/libglusterfs/src/gidcache.h
@@ -0,0 +1,53 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs.h"
+#include "locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems. If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC 4
+#define AUX_GID_CACHE_BUCKETS 256
+#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+ uint64_t gl_id;
+ int gl_count;
+ gid_t *gl_list;
+ time_t gl_deadline;
+} gid_list_t;
+
+typedef struct {
+ gf_lock_t gc_lock;
+ uint32_t gc_max_age;
+ unsigned int gc_nbuckets;
+ gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int gid_cache_init(gid_cache_t *, uint32_t);
+int gid_cache_reconf(gid_cache_t *, uint32_t);
+const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
+void gid_cache_release(gid_cache_t *, const gid_list_t *);
+int gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 11f62a550..259c5c885 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -19,115 +19,74 @@
#include "globals.h"
#include "xlator.h"
#include "mem-pool.h"
-
-
-/* gf_*_list[] */
-
-char *gf_fop_list[GF_FOP_MAXVALUE];
-char *gf_mgmt_list[GF_MGMT_MAXVALUE];
-
-
-void
-gf_op_list_init()
-{
- gf_fop_list[GF_FOP_NULL] = "NULL";
- gf_fop_list[GF_FOP_STAT] = "STAT";
- gf_fop_list[GF_FOP_READLINK] = "READLINK";
- gf_fop_list[GF_FOP_MKNOD] = "MKNOD";
- gf_fop_list[GF_FOP_MKDIR] = "MKDIR";
- gf_fop_list[GF_FOP_UNLINK] = "UNLINK";
- gf_fop_list[GF_FOP_RMDIR] = "RMDIR";
- gf_fop_list[GF_FOP_SYMLINK] = "SYMLINK";
- gf_fop_list[GF_FOP_RENAME] = "RENAME";
- gf_fop_list[GF_FOP_LINK] = "LINK";
- gf_fop_list[GF_FOP_TRUNCATE] = "TRUNCATE";
- gf_fop_list[GF_FOP_OPEN] = "OPEN";
- gf_fop_list[GF_FOP_READ] = "READ";
- gf_fop_list[GF_FOP_WRITE] = "WRITE";
- gf_fop_list[GF_FOP_STATFS] = "STATFS";
- gf_fop_list[GF_FOP_FLUSH] = "FLUSH";
- gf_fop_list[GF_FOP_FSYNC] = "FSYNC";
- gf_fop_list[GF_FOP_SETXATTR] = "SETXATTR";
- gf_fop_list[GF_FOP_GETXATTR] = "GETXATTR";
- gf_fop_list[GF_FOP_REMOVEXATTR] = "REMOVEXATTR";
- gf_fop_list[GF_FOP_OPENDIR] = "OPENDIR";
- gf_fop_list[GF_FOP_FSYNCDIR] = "FSYNCDIR";
- gf_fop_list[GF_FOP_ACCESS] = "ACCESS";
- gf_fop_list[GF_FOP_CREATE] = "CREATE";
- gf_fop_list[GF_FOP_FTRUNCATE] = "FTRUNCATE";
- gf_fop_list[GF_FOP_FSTAT] = "FSTAT";
- gf_fop_list[GF_FOP_LK] = "LK";
- gf_fop_list[GF_FOP_LOOKUP] = "LOOKUP";
- gf_fop_list[GF_FOP_READDIR] = "READDIR";
- gf_fop_list[GF_FOP_INODELK] = "INODELK";
- gf_fop_list[GF_FOP_FINODELK] = "FINODELK";
- gf_fop_list[GF_FOP_ENTRYLK] = "ENTRYLK";
- gf_fop_list[GF_FOP_FENTRYLK] = "FENTRYLK";
- gf_fop_list[GF_FOP_XATTROP] = "XATTROP";
- gf_fop_list[GF_FOP_FXATTROP] = "FXATTROP";
- gf_fop_list[GF_FOP_FSETXATTR] = "FSETXATTR";
- gf_fop_list[GF_FOP_FGETXATTR] = "FGETXATTR";
- gf_fop_list[GF_FOP_RCHECKSUM] = "RCHECKSUM";
- gf_fop_list[GF_FOP_SETATTR] = "SETATTR";
- gf_fop_list[GF_FOP_FSETATTR] = "FSETATTR";
- gf_fop_list[GF_FOP_READDIRP] = "READDIRP";
- gf_fop_list[GF_FOP_GETSPEC] = "GETSPEC";
- gf_fop_list[GF_FOP_FORGET] = "FORGET";
- gf_fop_list[GF_FOP_RELEASE] = "RELEASE";
- gf_fop_list[GF_FOP_RELEASEDIR] = "RELEASEDIR";
-
- gf_fop_list[GF_MGMT_NULL] = "NULL";
- return;
-}
-
-
-/* CTX */
-static glusterfs_ctx_t *glusterfs_ctx;
-
-
-int
-glusterfs_ctx_init ()
-{
- int ret = 0;
-
- if (glusterfs_ctx) {
- gf_log_callingfn ("", GF_LOG_WARNING, "init called again");
- goto out;
- }
-
- glusterfs_ctx = CALLOC (1, sizeof (*glusterfs_ctx));
- if (!glusterfs_ctx) {
- ret = -1;
- goto out;
- }
-
- INIT_LIST_HEAD (&glusterfs_ctx->graphs);
- INIT_LIST_HEAD (&glusterfs_ctx->mempool_list);
- ret = pthread_mutex_init (&glusterfs_ctx->lock, NULL);
-
-out:
- return ret;
-}
-
-
-glusterfs_ctx_t *
-glusterfs_ctx_get ()
-{
- return glusterfs_ctx;
-
-}
-
-
+#include "syncop.h"
+
+const char *gf_fop_list[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = "NULL",
+ [GF_FOP_STAT] = "STAT",
+ [GF_FOP_READLINK] = "READLINK",
+ [GF_FOP_MKNOD] = "MKNOD",
+ [GF_FOP_MKDIR] = "MKDIR",
+ [GF_FOP_UNLINK] = "UNLINK",
+ [GF_FOP_RMDIR] = "RMDIR",
+ [GF_FOP_SYMLINK] = "SYMLINK",
+ [GF_FOP_RENAME] = "RENAME",
+ [GF_FOP_LINK] = "LINK",
+ [GF_FOP_TRUNCATE] = "TRUNCATE",
+ [GF_FOP_OPEN] = "OPEN",
+ [GF_FOP_READ] = "READ",
+ [GF_FOP_WRITE] = "WRITE",
+ [GF_FOP_STATFS] = "STATFS",
+ [GF_FOP_FLUSH] = "FLUSH",
+ [GF_FOP_FSYNC] = "FSYNC",
+ [GF_FOP_SETXATTR] = "SETXATTR",
+ [GF_FOP_GETXATTR] = "GETXATTR",
+ [GF_FOP_REMOVEXATTR] = "REMOVEXATTR",
+ [GF_FOP_OPENDIR] = "OPENDIR",
+ [GF_FOP_FSYNCDIR] = "FSYNCDIR",
+ [GF_FOP_ACCESS] = "ACCESS",
+ [GF_FOP_CREATE] = "CREATE",
+ [GF_FOP_FTRUNCATE] = "FTRUNCATE",
+ [GF_FOP_FSTAT] = "FSTAT",
+ [GF_FOP_LK] = "LK",
+ [GF_FOP_LOOKUP] = "LOOKUP",
+ [GF_FOP_READDIR] = "READDIR",
+ [GF_FOP_INODELK] = "INODELK",
+ [GF_FOP_FINODELK] = "FINODELK",
+ [GF_FOP_ENTRYLK] = "ENTRYLK",
+ [GF_FOP_FENTRYLK] = "FENTRYLK",
+ [GF_FOP_XATTROP] = "XATTROP",
+ [GF_FOP_FXATTROP] = "FXATTROP",
+ [GF_FOP_FSETXATTR] = "FSETXATTR",
+ [GF_FOP_FGETXATTR] = "FGETXATTR",
+ [GF_FOP_RCHECKSUM] = "RCHECKSUM",
+ [GF_FOP_SETATTR] = "SETATTR",
+ [GF_FOP_FSETATTR] = "FSETATTR",
+ [GF_FOP_READDIRP] = "READDIRP",
+ [GF_FOP_GETSPEC] = "GETSPEC",
+ [GF_FOP_FORGET] = "FORGET",
+ [GF_FOP_RELEASE] = "RELEASE",
+ [GF_FOP_RELEASEDIR] = "RELEASEDIR",
+ [GF_FOP_FREMOVEXATTR]= "FREMOVEXATTR",
+ [GF_FOP_FALLOCATE] = "FALLOCATE",
+ [GF_FOP_DISCARD] = "DISCARD",
+ [GF_FOP_ZEROFILL] = "ZEROFILL",
+};
/* THIS */
xlator_t global_xlator;
static pthread_key_t this_xlator_key;
+static pthread_key_t synctask_key;
+static pthread_key_t uuid_buf_key;
+static char global_uuid_buf[GF_UUID_BUF_SIZE];
+static pthread_key_t lkowner_buf_key;
+static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
+
void
glusterfs_this_destroy (void *ptr)
{
- if (ptr)
- FREE (ptr);
+ FREE (ptr);
}
@@ -144,7 +103,6 @@ glusterfs_this_init ()
global_xlator.name = "glusterfs";
global_xlator.type = "global";
- global_xlator.ctx = glusterfs_ctx;
INIT_LIST_HEAD (&global_xlator.volume_options);
@@ -167,8 +125,6 @@ __glusterfs_this_location ()
ret = pthread_setspecific (this_xlator_key, this_location);
if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "pthread setspecific failed");
-
FREE (this_location);
this_location = NULL;
goto out;
@@ -210,10 +166,55 @@ glusterfs_this_set (xlator_t *this)
return 0;
}
-/* SYNCTASK */
+/* SYNCOPCTX */
+static pthread_key_t syncopctx_key;
-static pthread_key_t synctask_key;
+static void
+syncopctx_key_destroy (void *ptr)
+{
+ struct syncopctx *opctx = ptr;
+
+ if (opctx) {
+ if (opctx->groups)
+ GF_FREE (opctx->groups);
+ GF_FREE (opctx);
+ }
+
+ return;
+}
+
+void *
+syncopctx_getctx ()
+{
+ void *opctx = NULL;
+
+ opctx = pthread_getspecific (syncopctx_key);
+
+ return opctx;
+}
+
+int
+syncopctx_setctx (void *ctx)
+{
+ int ret = 0;
+
+ ret = pthread_setspecific (syncopctx_key, ctx);
+
+ return ret;
+}
+
+static int
+syncopctx_init (void)
+{
+ int ret;
+
+ ret = pthread_key_create (&syncopctx_key, syncopctx_key_destroy);
+
+ return ret;
+}
+
+/* SYNCTASK */
int
synctask_init ()
@@ -225,7 +226,6 @@ synctask_init ()
return ret;
}
-
void *
synctask_get ()
{
@@ -249,13 +249,10 @@ synctask_set (void *synctask)
//UUID_BUFFER
-static pthread_key_t uuid_buf_key;
-static char global_uuid_buf[GF_UUID_BUF_SIZE];
void
glusterfs_uuid_buf_destroy (void *ptr)
{
- if (ptr)
- FREE (ptr);
+ FREE (ptr);
}
int
@@ -278,7 +275,7 @@ glusterfs_uuid_buf_get ()
if(!buf) {
buf = MALLOC (GF_UUID_BUF_SIZE);
ret = pthread_setspecific (uuid_buf_key, (void *) buf);
- if(ret)
+ if (ret)
buf = global_uuid_buf;
}
return buf;
@@ -286,13 +283,10 @@ glusterfs_uuid_buf_get ()
/* LKOWNER_BUFFER */
-static pthread_key_t lkowner_buf_key;
-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
void
glusterfs_lkowner_buf_destroy (void *ptr)
{
- if (ptr)
- FREE (ptr);
+ FREE (ptr);
}
int
@@ -315,27 +309,18 @@ glusterfs_lkowner_buf_get ()
if(!buf) {
buf = MALLOC (GF_LKOWNER_BUF_SIZE);
ret = pthread_setspecific (lkowner_buf_key, (void *) buf);
- if(ret)
+ if (ret)
buf = global_lkowner_buf;
}
return buf;
}
int
-glusterfs_globals_init ()
+glusterfs_globals_init (glusterfs_ctx_t *ctx)
{
int ret = 0;
- gf_op_list_init ();
-
- gf_log_globals_init ();
-
- ret = glusterfs_ctx_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs context init failed");
- goto out;
- }
+ gf_log_globals_init (ctx);
ret = glusterfs_this_init ();
if (ret) {
@@ -358,42 +343,19 @@ glusterfs_globals_init ()
goto out;
}
- gf_mem_acct_enable_set ();
-
ret = synctask_init ();
if (ret) {
gf_log ("", GF_LOG_CRITICAL,
"ERROR: glusterfs synctask init failed");
goto out;
}
+
+ ret = syncopctx_init ();
+ if (ret) {
+ gf_log ("", GF_LOG_CRITICAL,
+ "ERROR: glusterfs syncopctx init failed");
+ goto out;
+ }
out:
return ret;
}
-
-
-char eventstring[GF_EVENT_MAXVAL+1][64] = {
- "Invalid event",
- "Parent Up",
- "Poll In",
- "Poll Out",
- "Poll Err",
- "Child Up",
- "Child Down",
- "Child Connecting",
- "Child Modified",
- "Transport Cleanup",
- "Transport Connected",
- "Volfile Modified",
- "New Volfile",
- "Translator Info",
- "Xlator Op",
- "Authentication Failed",
- "Invalid event",
-};
-
-/* Copy the string ptr contents if needed for yourself */
-char *
-glusterfs_strevent (glusterfs_event_t ev)
-{
- return eventstring[ev];
-}
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index e797db184..3085db21c 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -13,33 +13,50 @@
#define GF_DEFAULT_BASE_PORT 24007
-#include "glusterfs.h"
-
-/* CTX */
-#define CTX (glusterfs_ctx_get())
-
-glusterfs_ctx_t *glusterfs_ctx_get ();
+#define GD_OP_VERSION_KEY "operating-version"
+#define GD_MIN_OP_VERSION_KEY "minimum-operating-version"
+#define GD_MAX_OP_VERSION_KEY "maximum-operating-version"
+
+/* Gluster versions - OP-VERSION mapping
+ *
+ * 3.3.0 - 1
+ * 3.4.0 - 2
+ * 3.next (3.5?) - 3
+ *
+ * TODO: Change above comment once gluster version is finalised
+ * TODO: Finalize the op-version ranges
+ */
+#define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly
+ should not change */
+#define GD_OP_VERSION_MAX 3 /* MAX VERSION is the maximum count in VME table,
+ should keep changing with introduction of newer
+ versions */
#include "xlator.h"
/* THIS */
#define THIS (*__glusterfs_this_location())
-#define GF_UUID_BUF_SIZE 50
-
xlator_t **__glusterfs_this_location ();
xlator_t *glusterfs_this_get ();
int glusterfs_this_set (xlator_t *);
+/* syncopctx */
+void *syncopctx_getctx ();
+int syncopctx_setctx (void *ctx);
+
/* task */
void *synctask_get ();
int synctask_set (void *);
/* uuid_buf */
char *glusterfs_uuid_buf_get();
+/* lkowner_buf */
char *glusterfs_lkowner_buf_get();
/* init */
-int glusterfs_globals_init (void);
+int glusterfs_globals_init (glusterfs_ctx_t *ctx);
+
+extern const char *gf_fop_list[];
#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs-acl.h b/libglusterfs/src/glusterfs-acl.h
new file mode 100644
index 000000000..b7de1cdb4
--- /dev/null
+++ b/libglusterfs/src/glusterfs-acl.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_ACL_H
+#define _GLUSTERFS_ACL_H
+
+#include <stdint.h>
+#include <sys/types.h> /* For uid_t */
+
+#include "locking.h" /* For gf_lock_t in struct posix_acl_conf */
+
+#define ACL_PROGRAM 100227
+#define ACLV3_VERSION 3
+
+#define POSIX_ACL_MINIMAL_ACE_COUNT 3
+
+#define POSIX_ACL_READ (0x04)
+#define POSIX_ACL_WRITE (0x02)
+#define POSIX_ACL_EXECUTE (0x01)
+
+#define POSIX_ACL_UNDEFINED_TAG (0x00)
+#define POSIX_ACL_USER_OBJ (0x01)
+#define POSIX_ACL_USER (0x02)
+#define POSIX_ACL_GROUP_OBJ (0x04)
+#define POSIX_ACL_GROUP (0x08)
+#define POSIX_ACL_MASK (0x10)
+#define POSIX_ACL_OTHER (0x20)
+
+#define POSIX_ACL_UNDEFINED_ID (-1)
+
+#define POSIX_ACL_VERSION (0x02)
+
+#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
+#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
+
+struct posix_acl_xattr_entry {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl_xattr_header {
+ uint32_t version;
+ struct posix_acl_xattr_entry entries[];
+};
+
+struct posix_ace {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+
+struct posix_acl {
+ int refcnt;
+ int count;
+ struct posix_ace entries[];
+};
+
+struct posix_acl_ctx {
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+struct posix_acl_conf {
+ gf_lock_t acl_lock;
+ uid_t super_uid;
+ struct posix_acl *minimal_acl;
+};
+
+#endif /* _GLUSTERFS_ACL_H */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 0917ac1b5..2f1e12ee7 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -32,16 +32,23 @@
#include <arpa/inet.h>
#include <sys/poll.h>
#include <pthread.h>
+#include <limits.h> /* For PATH_MAX */
#include "list.h"
#include "logging.h"
+#include "lkowner.h"
#define GF_YES 1
#define GF_NO 0
#ifndef O_LARGEFILE
/* savannah bug #20053, patch for compiling on darwin */
-#define O_LARGEFILE 0
+#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
+#endif
+
+#ifndef O_FMODE_EXEC
+/* redhat bug 843080, added from linux/fs.h */
+#define O_FMODE_EXEC 040 //0x20
#endif
#ifndef O_DIRECT
@@ -63,41 +70,61 @@
#define FNM_EXTMATCH 0
#endif
+#define GLUSTERD_MAX_SNAP_NAME 256
#define ZR_MOUNTPOINT_OPT "mountpoint"
#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
+#define ZR_NEGATIVE_TIMEOUT_OPT "negative-timeout"
#define ZR_DIRECT_IO_OPT "direct-io-mode"
#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
#define ZR_DUMP_FUSE "dump-fuse"
+#define ZR_FUSE_MOUNTOPTS "fuse-mountopts"
#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
+#define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo"
+#define GF_XATTR_GET_REAL_FILENAME_KEY "user.glusterfs.get_real_filename:"
+
+#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
+
+#define BD_XATTR_KEY "user.glusterfs"
#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
strlen (GF_XATTR_PATHINFO_KEY)) == 0)
#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
strlen (GF_XATTR_NODE_UUID_KEY)) == 0)
+#define XATTR_IS_LOCKINFO(x) (strncmp (x, GF_XATTR_LOCKINFO_KEY, \
+ strlen (GF_XATTR_LOCKINFO_KEY)) == 0)
+
+#define XATTR_IS_BD(x) (strncmp (x, BD_XATTR_KEY, strlen (BD_XATTR_KEY)) == 0)
#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
-#define GFID_XATTR_KEY "trusted.gfid"
+#define GFID_XATTR_KEY "trusted.gfid"
+#define VIRTUAL_GFID_XATTR_KEY_STR "glusterfs.gfid.string"
+#define VIRTUAL_GFID_XATTR_KEY "glusterfs.gfid"
+#define UUID_CANONICAL_FORM_LEN 36
#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
#define ZR_FILE_CONTENT_STR "glusterfs.file."
#define ZR_FILE_CONTENT_STRLEN 15
+#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
+#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
+#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
/* Index xlator related */
#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
+#define GF_BASE_INDICES_HOLDER_GFID "glusterfs.base_indicies_holder_gfid"
#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
/* replace-brick and pump related internal xattrs */
@@ -107,9 +134,6 @@
#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
-#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
-#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
-
#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
#define GLUSTERFS_RDMA_MAX_HEADER_SIZE (228) /* (sizeof (rdma_header_t) \
+ RDMA_MAX_SEGMENTS \
@@ -119,13 +143,29 @@
#define GLUSTERFS_RPC_REPLY_SIZE 24
#define ZR_FILE_CONTENT_REQUEST(key) (!strncmp(key, ZR_FILE_CONTENT_STR, \
- ZR_FILE_CONTENT_STRLEN))
+ ZR_FILE_CONTENT_STRLEN))
+
+#define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster"
+#define GF_REPLICATE_TRASH_DIR ".landfill"
/* GlusterFS's maximum supported Auxilary GIDs */
/* TODO: Keeping it to 200, so that we can fit in 2KB buffer for auth data
* in RPC server code, if there is ever need for having more aux-gids, then
* we have to add aux-gid in payload of actors */
-#define GF_MAX_AUX_GROUPS 200
+#define GF_MAX_AUX_GROUPS 65536
+
+#define GF_UUID_BUF_SIZE 50
+
+#define GF_REBALANCE_TID_KEY "rebalance-id"
+#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id"
+#define GF_REPLACE_BRICK_TID_KEY "replace-brick-id"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+/* Adding this here instead of any glusterd*.h files as it is also required by
+ * cli
+ */
+#define DEFAULT_GLUSTERD_SOCKFILE DATADIR "/run/glusterd.socket"
/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
typedef enum {
@@ -160,8 +200,8 @@ typedef enum {
GF_FOP_READDIR,
GF_FOP_INODELK,
GF_FOP_FINODELK,
- GF_FOP_ENTRYLK,
- GF_FOP_FENTRYLK,
+ GF_FOP_ENTRYLK,
+ GF_FOP_FENTRYLK,
GF_FOP_XATTROP,
GF_FOP_FXATTROP,
GF_FOP_FGETXATTR,
@@ -175,6 +215,9 @@ typedef enum {
GF_FOP_RELEASEDIR,
GF_FOP_GETSPEC,
GF_FOP_FREMOVEXATTR,
+ GF_FOP_FALLOCATE,
+ GF_FOP_DISCARD,
+ GF_FOP_ZEROFILL,
GF_FOP_MAXVALUE,
} glusterfs_fop_t;
@@ -224,21 +267,23 @@ typedef enum {
typedef enum {
- ENTRYLK_LOCK,
- ENTRYLK_UNLOCK,
- ENTRYLK_LOCK_NB
+ ENTRYLK_LOCK,
+ ENTRYLK_UNLOCK,
+ ENTRYLK_LOCK_NB
} entrylk_cmd;
typedef enum {
- ENTRYLK_RDLCK,
- ENTRYLK_WRLCK
+ ENTRYLK_RDLCK,
+ ENTRYLK_WRLCK
} entrylk_type;
typedef enum {
- GF_XATTROP_ADD_ARRAY,
- GF_XATTROP_ADD_ARRAY64
+ GF_XATTROP_ADD_ARRAY,
+ GF_XATTROP_ADD_ARRAY64,
+ GF_XATTROP_OR_ARRAY,
+ GF_XATTROP_AND_ARRAY
} gf_xattrop_flags_t;
@@ -247,68 +292,79 @@ typedef enum {
#define GF_SET_DIR_ONLY 0x4
#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
-/* Directory into which replicate self-heal will move deleted files and
- directories into. The storage/posix janitor thread will periodically
- clean up this directory */
-
-#define GF_REPLICATE_TRASH_DIR ".landfill"
-
/* key value which quick read uses to get small files in lookup cbk */
#define GF_CONTENT_KEY "glusterfs.content"
struct _xlator_cmdline_option {
- struct list_head cmd_args;
- char *volume;
- char *key;
- char *value;
+ struct list_head cmd_args;
+ char *volume;
+ char *key;
+ char *value;
};
typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
+struct _server_cmdline {
+ struct list_head list;
+ char *volfile_server;
+};
+typedef struct _server_cmdline server_cmdline_t;
#define GF_OPTION_ENABLE _gf_true
#define GF_OPTION_DISABLE _gf_false
#define GF_OPTION_DEFERRED 2
struct _cmd_args {
- /* basic options */
- char *volfile_server;
- char *volfile;
- char *log_server;
- gf_loglevel_t log_level;
- char *log_file;
+ /* basic options */
+ char *volfile_server;
+ server_cmdline_t *curr_server;
+ /* List of backup volfile servers, including original */
+ struct list_head volfile_servers;
+ char *volfile;
+ char *log_server;
+ gf_loglevel_t log_level;
+ char *log_file;
int32_t max_connect_attempts;
- /* advanced options */
- uint32_t volfile_server_port;
- char *volfile_server_transport;
+ /* advanced options */
+ uint32_t volfile_server_port;
+ char *volfile_server_transport;
uint32_t log_server_port;
- char *pid_file;
+ char *pid_file;
char *sock_file;
- int no_daemon_mode;
- char *run_id;
- int debug_mode;
+ int no_daemon_mode;
+ char *run_id;
+ int debug_mode;
int read_only;
int acl;
+ int selinux;
+ int enable_ino32;
int worm;
int mac_compat;
- struct list_head xlator_options; /* list of xlator_option_t */
-
- /* fuse options */
- int fuse_direct_io_mode;
+ int fopen_keep_cache;
+ int gid_timeout;
+ int aux_gfid_mount;
+ struct list_head xlator_options; /* list of xlator_option_t */
+
+ /* fuse options */
+ int fuse_direct_io_mode;
+ char *use_readdirp;
int volfile_check;
- double fuse_entry_timeout;
- double fuse_attribute_timeout;
- char *volume_name;
- int fuse_nodev;
- int fuse_nosuid;
- char *dump_fuse;
+ double fuse_entry_timeout;
+ double fuse_negative_timeout;
+ double fuse_attribute_timeout;
+ char *volume_name;
+ int fuse_nodev;
+ int fuse_nosuid;
+ char *dump_fuse;
pid_t client_pid;
int client_pid_set;
unsigned uid_map_root;
+ int background_qlen;
+ int congestion_threshold;
+ char *fuse_mountopts;
-
- /* key args */
- char *mount_point;
- char *volfile_id;
+ /* key args */
+ char *mount_point;
+ char *volfile_id;
/* required for portmap */
int brick_port;
@@ -333,17 +389,19 @@ struct _glusterfs_graph {
typedef struct _glusterfs_graph glusterfs_graph_t;
+typedef int32_t (*glusterfsd_mgmt_event_notify_fn_t) (int32_t event, void *data,
+ ...);
struct _glusterfs_ctx {
- cmd_args_t cmd_args;
- char *process_uuid;
- FILE *pidfp;
- char fin;
- void *timer;
- void *ib;
- void *pool;
- void *event_pool;
+ cmd_args_t cmd_args;
+ char *process_uuid;
+ FILE *pidfp;
+ char fin;
+ void *timer;
+ void *ib;
+ struct call_pool *pool;
+ void *event_pool;
void *iobuf_pool;
- pthread_mutex_t lock;
+ pthread_mutex_t lock;
size_t page_size;
struct list_head graphs; /* double linked list of graphs - one per volfile parse */
glusterfs_graph_t *active; /* the latest graph in use */
@@ -359,26 +417,32 @@ struct _glusterfs_ctx {
got changed */
pid_t mnt_pid; /* pid of the mount agent */
int process_mode; /*mode in which process is runninng*/
- struct syncenv *env; /* The env pointer to the synctasks */
+ struct syncenv *env; /* The env pointer to the synctasks */
struct list_head mempool_list; /* used to keep a global list of
mempools, used to log details of
mempool in statedump */
- char *statedump_path;
+ char *statedump_path;
struct mem_pool *dict_pool;
struct mem_pool *dict_pair_pool;
struct mem_pool *dict_data_pool;
- int mem_accounting; /* if value is other than 0, it
- will be set */
+ glusterfsd_mgmt_event_notify_fn_t notify; /* Used for xlators to make
+ call to fsd-mgmt */
+ gf_log_handle_t log; /* all logging related variables */
+
+ int mem_acct_enable;
+
+ int daemon_pipe[2];
+
+ struct client_disconnect *client_disconnect;
+ struct clienttable *clienttable;
};
typedef struct _glusterfs_ctx glusterfs_ctx_t;
+glusterfs_ctx_t *glusterfs_ctx_new (void);
-/* If you edit this structure then, make a corresponding change in
- * globals.c in the eventstring.
- */
typedef enum {
GF_EVENT_PARENT_UP = 1,
GF_EVENT_POLLIN,
@@ -397,12 +461,10 @@ typedef enum {
GF_EVENT_AUTH_FAILED,
GF_EVENT_VOLUME_DEFRAG,
GF_EVENT_PARENT_DOWN,
+ GF_EVENT_VOLUME_BARRIER_OP,
GF_EVENT_MAXVAL,
} glusterfs_event_t;
-/* gf_lkowner_t is defined in lkowner.h */
-#include "lkowner.h"
-
struct gf_flock {
short l_type;
short l_whence;
@@ -412,8 +474,6 @@ struct gf_flock {
gf_lkowner_t l_owner;
};
-extern char *glusterfs_strevent (glusterfs_event_t ev);
-
#define GF_MUST_CHECK __attribute__((warn_unused_result))
/*
* Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c
index 667129864..d860d63b3 100644
--- a/libglusterfs/src/graph-print.c
+++ b/libglusterfs/src/graph-print.c
@@ -24,6 +24,7 @@
struct gf_printer {
ssize_t (*write) (struct gf_printer *gp, char *buf, size_t len);
void *priv;
+ int len;
};
static ssize_t
@@ -80,19 +81,31 @@ gpprintf (struct gf_printer *gp, const char *format, ...)
return ret;
}
-static int
-glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
-{
#define GPPRINTF(gp, fmt, ...) do { \
ret = gpprintf (gp, fmt, ## __VA_ARGS__); \
if (ret == -1) \
goto out; \
else \
- len += ret; \
+ gp->len += ret; \
} while (0)
+static int
+_print_volume_options (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ struct gf_printer *gp = tmp;
+ int ret = 0;
+ GPPRINTF (gp, " option %s %s\n", k, v->data);
+ return 0;
+out:
+ /* means, it is a failure */
+ return -1;
+}
+
+static int
+glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
+{
xlator_t *trav = NULL;
- data_pair_t *pair = NULL;
xlator_list_t *xch = NULL;
int ret = 0;
ssize_t len = 0;
@@ -105,11 +118,9 @@ glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
GPPRINTF (gp, "volume %s\n type %s\n", trav->name,
trav->type);
- for (pair = trav->options->members_list; pair && pair->next;
- pair = pair->next);
- for (; pair; pair = pair->prev)
- GPPRINTF (gp, " option %s %s\n", pair->key,
- pair->value->data);
+ ret = dict_foreach (trav->options, _print_volume_options, gp);
+ if (ret)
+ goto out;
if (trav->children) {
GPPRINTF (gp, " subvolumes");
@@ -126,6 +137,7 @@ glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
}
out:
+ len = gp->len;
if (ret == -1) {
gf_log ("graph-print", GF_LOG_ERROR, "printing failed");
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index 65cbb2e83..e76df1ca5 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -20,28 +20,23 @@
#include "defaults.h"
-
-
#if 0
static void
_gf_dump_details (int argc, char **argv)
{
extern FILE *gf_log_logfile;
int i = 0;
- char timestr[256];
+ char timestr[64];
time_t utime = 0;
- struct tm *tm = NULL;
pid_t mypid = 0;
struct utsname uname_buf = {{0, }, };
int uname_ret = -1;
- utime = time (NULL);
- tm = localtime (&utime);
mypid = getpid ();
uname_ret = uname (&uname_buf);
- /* Which git? What time? */
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ utime = time (NULL);
+ gf_time_fmt (timestr, sizeof timestr, utime, gf_timefmt_FT);
fprintf (gf_log_logfile,
"========================================"
"========================================\n");
@@ -122,7 +117,8 @@ glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl)
int
glusterfs_graph_insert (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
- const char *type, const char *name)
+ const char *type, const char *name,
+ gf_boolean_t autoload)
{
xlator_t *ixl = NULL;
@@ -148,6 +144,8 @@ glusterfs_graph_insert (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
if (!ixl->name)
goto err;
+ ixl->is_autoloaded = autoload;
+
if (xlator_set_type (ixl, type) == -1) {
gf_log ("glusterfs", GF_LOG_ERROR,
"%s (%s) initialization failed",
@@ -178,7 +176,7 @@ glusterfs_graph_acl (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
return 0;
ret = glusterfs_graph_insert (graph, ctx, "system/posix-acl",
- "posix-acl-autoload");
+ "posix-acl-autoload", 1);
return ret;
}
@@ -194,7 +192,7 @@ glusterfs_graph_worm (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
return 0;
ret = glusterfs_graph_insert (graph, ctx, "features/worm",
- "worm-autoload");
+ "worm-autoload", 1);
return ret;
}
@@ -210,11 +208,26 @@ glusterfs_graph_mac_compat (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
return 0;
ret = glusterfs_graph_insert (graph, ctx, "features/mac-compat",
- "mac-compat-autoload");
+ "mac-compat-autoload", 1);
return ret;
}
+int
+glusterfs_graph_gfid_access (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->aux_gfid_mount)
+ return 0;
+
+ ret = glusterfs_graph_insert (graph, ctx, "features/gfid-access",
+ "gfid-access-autoload", 1);
+ return ret;
+}
static void
gf_add_cmdline_options (glusterfs_graph_t *graph, cmd_args_t *cmd_args)
@@ -301,7 +314,7 @@ glusterfs_graph_init (glusterfs_graph_t *graph)
}
-static void
+static int
_log_if_unknown_option (dict_t *dict, char *key, data_t *value, void *data)
{
volume_option_t *found = NULL;
@@ -316,7 +329,7 @@ _log_if_unknown_option (dict_t *dict, char *key, data_t *value, void *data)
"option '%s' is not recognized", key);
}
- return;
+ return 0;
}
@@ -340,8 +353,7 @@ fill_uuid (char *uuid, int size)
{
char hostname[256] = {0,};
struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
+ char now_str[64];
if (gettimeofday (&tv, NULL) == -1) {
gf_log ("graph", GF_LOG_ERROR,
@@ -355,8 +367,7 @@ fill_uuid (char *uuid, int size)
strerror (errno));
}
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
+ gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
snprintf (uuid, size, "%s-%d-%s:%"GF_PRI_SUSECONDS,
hostname, getpid(), now_str, tv.tv_usec);
@@ -445,6 +456,14 @@ glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
return -1;
}
+ /* XXX: gfid-access */
+ ret = glusterfs_graph_gfid_access (graph, ctx);
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "glusterfs graph 'gfid-access' failed");
+ return -1;
+ }
+
/* XXX: this->ctx setting */
for (trav = graph->first; trav; trav = trav->next) {
trav->ctx = ctx;
@@ -496,11 +515,14 @@ glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
ctx->active = graph;
/* XXX: attach to master and set active pointer */
- if (ctx->master)
+ if (ctx->master) {
ret = xlator_notify (ctx->master, GF_EVENT_GRAPH_NEW, graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "graph new notification failed");
- return ret;
+ if (ret) {
+ gf_log ("graph", GF_LOG_ERROR,
+ "graph new notification failed");
+ return ret;
+ }
+ ((xlator_t *)ctx->master)->next = graph->top;
}
/* XXX: perform parent up */
@@ -513,18 +535,204 @@ glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
return 0;
}
+
+int
+xlator_equal_rec (xlator_t *xl1, xlator_t *xl2)
+{
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int ret = 0;
+
+ if (xl1 == NULL || xl2 == NULL) {
+ gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
+ return -1;
+ }
+
+ trav1 = xl1->children;
+ trav2 = xl2->children;
+
+ while (trav1 && trav2) {
+ ret = xlator_equal_rec (trav1->xlator, trav2->xlator);
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "xlators children not equal");
+ goto out;
+ }
+
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
+
+ if (trav1 || trav2) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp (xl1->name, xl2->name)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* type could have changed even if xlator names match,
+ e.g cluster/distrubte and cluster/nufa share the same
+ xlator name
+ */
+ if (strcmp (xl1->type, xl2->type)) {
+ ret = -1;
+ goto out;
+ }
+out :
+ return ret;
+}
+
+
+gf_boolean_t
+is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2)
+{
+ xlator_t *trav1 = NULL;
+ xlator_t *trav2 = NULL;
+ gf_boolean_t ret = _gf_true;
+
+ trav1 = graph1->first;
+ trav2 = graph2->first;
+
+ ret = xlator_equal_rec (trav1, trav2);
+
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "graphs are not equal");
+ ret = _gf_false;
+ goto out;
+ }
+
+ ret = _gf_true;
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "graphs are equal");
+
+out:
+ return ret;
+}
+
+
+/* Function has 3types of return value 0, -ve , 1
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
+ * return -1(or -ve) =======> Some Internal Error occurred during the operation
+ */
+int
+glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
+ glusterfs_ctx_t *ctx, const char *oldvolfile)
+{
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ FILE *oldvolfile_fp = NULL;
+ gf_boolean_t active_graph_found = _gf_true;
+
+ int ret = -1;
+
+ if (!oldvollen) {
+ ret = 1; // Has to call INIT for the whole graph
+ goto out;
+ }
+
+ if (!ctx) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ active_graph_found = _gf_false;
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
+ "glusterfs_ctx->active is NULL");
+
+ oldvolfile_fp = tmpfile ();
+ if (!oldvolfile_fp) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, "Unable to "
+ "create temporary volfile: (%s)",
+ strerror (errno));
+ goto out;
+ }
+
+ fwrite (oldvolfile, oldvollen, 1, oldvolfile_fp);
+ fflush (oldvolfile_fp);
+ if (ferror (oldvolfile_fp)) {
+ goto out;
+ }
+
+ oldvolfile_graph = glusterfs_graph_construct (oldvolfile_fp);
+ if (!oldvolfile_graph)
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct (newvolfile_fp);
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ if (!is_graph_topology_equal (oldvolfile_graph,
+ newvolfile_graph)) {
+
+ ret = 1;
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Graph topology not equal(should call INIT)");
+ goto out;
+ }
+
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Only options have changed in the new "
+ "graph");
+
+ /* */
+ ret = glusterfs_graph_reconfigure (oldvolfile_graph,
+ newvolfile_graph);
+ if (ret) {
+ gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "Could not reconfigure new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (oldvolfile_fp)
+ fclose (oldvolfile_fp);
+
+ /* Do not simply destroy the old graph here. If the oldgraph
+ is constructed here in this function itself instead of getting
+ it from ctx->active (which happens only of ctx->active is NULL),
+ then destroy the old graph. If some i/o is still happening in
+ the old graph and the old graph is obtained from ctx->active,
+ then destroying the graph will cause problems.
+ */
+ if (!active_graph_found && oldvolfile_graph)
+ glusterfs_graph_destroy (oldvolfile_graph);
+ if (newvolfile_graph)
+ glusterfs_graph_destroy (newvolfile_graph);
+
+ return ret;
+}
+
+
int
glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
glusterfs_graph_t *newgraph)
{
- xlator_t *old_xl = NULL;
- xlator_t *new_xl = NULL;
+ xlator_t *old_xl = NULL;
+ xlator_t *new_xl = NULL;
GF_ASSERT (oldgraph);
GF_ASSERT (newgraph);
old_xl = oldgraph->first;
+ while (old_xl->is_autoloaded) {
+ old_xl = old_xl->children->xlator;
+ }
+
new_xl = newgraph->first;
+ while (new_xl->is_autoloaded) {
+ new_xl = new_xl->children->xlator;
+ }
return xlator_tree_reconfigure (old_xl, new_xl);
}
@@ -532,5 +740,12 @@ glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
int
glusterfs_graph_destroy (glusterfs_graph_t *graph)
{
+ xlator_tree_free (graph->first);
+
+ if (graph) {
+ list_del_init (&graph->list);
+ GF_FREE (graph);
+ }
+
return 0;
}
diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l
index 04fff2582..e4eba9cbe 100644
--- a/libglusterfs/src/graph.l
+++ b/libglusterfs/src/graph.l
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%x STRING
%option yylineno
%option noinput
@@ -80,10 +70,10 @@ TYPE [t][y][p][e]
yyunput (0, NULL);
}
BEGIN (INITIAL);
- yylval = text;
+ graphyylval = text;
return STRING_TOK;
}
}
-[^ \t\r\n\"\\]+ { yylval = gf_strdup (yytext) ; return ID; }
+[^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; }
[ \t\r\n]+ ;
%%
diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
index 16ee2d43a..a220abeb9 100644
--- a/libglusterfs/src/graph.y
+++ b/libglusterfs/src/graph.y
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%token VOLUME_BEGIN VOLUME_END OPTION NEWLINE SUBVOLUME ID WHITESPACE COMMENT TYPE STRING_TOK
%{
@@ -28,6 +18,7 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <pthread.h>
#define RELAX_POISONING
@@ -47,8 +38,8 @@ static void option_error (void);
#define YYSTYPE char *
#define GF_CMD_BUFFER_LEN (8 * GF_UNIT_KB)
-int yyerror (const char *);
-int yylex ();
+int graphyyerror (const char *);
+int graphyylex ();
%}
@@ -88,11 +79,11 @@ glusterfs_graph_t *construct;
static void
type_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
gf_log ("parser", GF_LOG_ERROR,
"Volume %s, before line %d: Please specify volume type",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -100,11 +91,11 @@ type_error (void)
static void
sub_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
gf_log ("parser", GF_LOG_ERROR,
"Volume %s, before line %d: Please specify subvolumes",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -112,12 +103,12 @@ sub_error (void)
static void
option_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
gf_log ("parser", GF_LOG_ERROR,
"Volume %s, before line %d: Please specify "
"option <key> <value>",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -125,7 +116,7 @@ option_error (void)
static int
new_volume (char *name)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
@@ -139,7 +130,7 @@ new_volume (char *name)
if (curr) {
gf_log ("parser", GF_LOG_ERROR,
"new volume (%s) defintion in line %d unexpected",
- name, yylineno);
+ name, graphyylineno);
ret = -1;
goto out;
}
@@ -159,7 +150,7 @@ new_volume (char *name)
if (!strcmp (name, trav->name)) {
gf_log ("parser", GF_LOG_ERROR,
"Line %d: volume '%s' defined again",
- yylineno, name);
+ graphyylineno, name);
ret = -1;
goto out;
}
@@ -204,7 +195,7 @@ out:
static int
volume_type (char *type)
{
- extern int yylineno;
+ extern int graphyylineno;
int32_t ret = 0;
if (!type) {
@@ -218,7 +209,7 @@ volume_type (char *type)
gf_log ("parser", GF_LOG_ERROR,
"Volume '%s', line %d: type '%s' is not valid or "
"not found on this machine",
- curr->name, yylineno, type);
+ curr->name, graphyylineno, type);
ret = -1;
goto out;
}
@@ -235,7 +226,7 @@ out:
static int
volume_option (char *key, char *value)
{
- extern int yylineno;
+ extern int graphyylineno;
int ret = 0;
char *set_value = NULL;
@@ -252,7 +243,7 @@ volume_option (char *key, char *value)
gf_log ("parser", GF_LOG_ERROR,
"Volume '%s', line %d: duplicate entry "
"('option %s') present",
- curr->name, yylineno, key);
+ curr->name, graphyylineno, key);
ret = -1;
goto out;
}
@@ -271,7 +262,7 @@ out:
static int
volume_sub (char *sub)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
@@ -293,7 +284,7 @@ volume_sub (char *sub)
gf_log ("parser", GF_LOG_ERROR,
"Volume '%s', line %d: subvolume '%s' is not defined "
"prior to usage",
- curr->name, yylineno, sub);
+ curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
@@ -301,7 +292,7 @@ volume_sub (char *sub)
if (trav == curr) {
gf_log ("parser", GF_LOG_ERROR,
"Volume '%s', line %d: has '%s' itself as subvolume",
- curr->name, yylineno, sub);
+ curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
@@ -338,46 +329,46 @@ volume_end (void)
int
-yywrap ()
+graphyywrap ()
{
return 1;
}
int
-yyerror (const char *str)
+graphyyerror (const char *str)
{
- extern char *yytext;
- extern int yylineno;
+ extern char *graphyytext;
+ extern int graphyylineno;
- if (curr && curr->name && yytext) {
- if (!strcmp (yytext, "volume")) {
+ if (curr && curr->name && graphyytext) {
+ if (!strcmp (graphyytext, "volume")) {
gf_log ("parser", GF_LOG_ERROR,
"'end-volume' not defined for volume '%s'",
curr->name);
- } else if (!strcmp (yytext, "type")) {
+ } else if (!strcmp (graphyytext, "type")) {
gf_log ("parser", GF_LOG_ERROR,
"line %d: duplicate 'type' defined for "
"volume '%s'",
- yylineno, curr->name);
- } else if (!strcmp (yytext, "subvolumes")) {
+ graphyylineno, curr->name);
+ } else if (!strcmp (graphyytext, "subvolumes")) {
gf_log ("parser", GF_LOG_ERROR,
"line %d: duplicate 'subvolumes' defined for "
"volume '%s'",
- yylineno, curr->name);
+ graphyylineno, curr->name);
} else if (curr) {
gf_log ("parser", GF_LOG_ERROR,
"syntax error: line %d (volume '%s'): \"%s\""
"\nallowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'()",
- yylineno, curr->name,
- yytext);
+ graphyylineno, curr->name,
+ graphyytext);
} else {
gf_log ("parser", GF_LOG_ERROR,
"syntax error: line %d (just after volume "
"'%s'): \"%s\"\n(%s)",
- yylineno, curr->name,
- yytext,
+ graphyylineno, curr->name,
+ graphyytext,
"allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'");
}
@@ -386,7 +377,7 @@ yyerror (const char *str)
"syntax error in line %d: \"%s\" \n"
"(allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume')\n",
- yylineno, yytext);
+ graphyylineno, graphyytext);
}
return -1;
@@ -491,6 +482,7 @@ preprocess (FILE *srcfp, FILE *dstfp)
cmd_buf_size *= 2;
cmd = GF_REALLOC (cmd, cmd_buf_size);
if (cmd == NULL) {
+ GF_FREE (result);
return -1;
}
@@ -532,6 +524,7 @@ preprocess (FILE *srcfp, FILE *dstfp)
out:
fseek (srcfp, 0L, SEEK_SET);
fseek (dstfp, 0L, SEEK_SET);
+
GF_FREE (cmd);
GF_FREE (result);
@@ -539,7 +532,7 @@ out:
}
-extern FILE *yyin;
+extern FILE *graphyyin;
glusterfs_graph_t *
glusterfs_graph_new ()
@@ -563,52 +556,65 @@ glusterfs_graph_t *
glusterfs_graph_construct (FILE *fp)
{
int ret = 0;
+ int tmp_fd = -1;
glusterfs_graph_t *graph = NULL;
- FILE *tmp_file = NULL;
+ FILE *tmp_file = NULL;
+ char template[PATH_MAX] = {0};
+ static pthread_mutex_t graph_mutex = PTHREAD_MUTEX_INITIALIZER;
graph = glusterfs_graph_new ();
if (!graph)
- return NULL;
+ goto err;
- tmp_file = tmpfile ();
+ strcpy (template, "/tmp/tmp.XXXXXX");
+ tmp_fd = mkstemp (template);
+ if (-1 == tmp_fd)
+ goto err;
- if (tmp_file == NULL) {
- gf_log ("parser", GF_LOG_ERROR,
- "cannot create temporary file");
+ ret = unlink (template);
+ if (ret < 0) {
+ gf_log ("parser", GF_LOG_WARNING, "Unable to delete file: %s",
+ template);
+ }
- glusterfs_graph_destroy (graph);
- return NULL;
- }
+ tmp_file = fdopen (tmp_fd, "w+b");
+ if (!tmp_file)
+ goto err;
- ret = preprocess (fp, tmp_file);
- if (ret < 0) {
- gf_log ("parser", GF_LOG_ERROR,
- "parsing of backticks failed");
+ ret = preprocess (fp, tmp_file);
+ if (ret < 0) {
+ gf_log ("parser", GF_LOG_ERROR, "parsing of backticks failed");
+ goto err;
+ }
- glusterfs_graph_destroy (graph);
- fclose (tmp_file);
- return NULL;
+ pthread_mutex_lock (&graph_mutex);
+ {
+ graphyyin = tmp_file;
+ construct = graph;
+ ret = yyparse ();
+ construct = NULL;
}
-
- yyin = tmp_file;
-
- construct = graph;
-
- ret = yyparse ();
-
- construct = NULL;
-
- fclose (tmp_file);
+ pthread_mutex_unlock (&graph_mutex);
if (ret == 1) {
gf_log ("parser", GF_LOG_DEBUG,
- "parsing of volfile failed, please review it "
- "once more");
-
- glusterfs_graph_destroy (graph);
- return NULL;
+ "parsing of volfile failed, please review it "
+ "once more");
+ goto err;
}
+ fclose (tmp_file);
return graph;
+err:
+ if (tmp_file) {
+ fclose (tmp_file);
+ } else {
+ gf_log ("parser", GF_LOG_ERROR, "cannot create temporary file");
+ if (-1 != tmp_fd)
+ close (tmp_fd);
+ }
+
+ glusterfs_graph_destroy (graph);
+ return NULL;
}
diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/iatt.h
index eb916ad5e..60ae59047 100644
--- a/libglusterfs/src/iatt.h
+++ b/libglusterfs/src/iatt.h
@@ -265,6 +265,24 @@ iatt_from_stat (struct iatt *iatt, struct stat *stat)
iatt->ia_blksize = stat->st_blksize;
iatt->ia_blocks = stat->st_blocks;
+ /* There is a possibility that the backend FS (like XFS) can
+ allocate blocks beyond EOF for better performance reasons, which
+ results in 'st_blocks' with higher values than what is consumed by
+ the file descriptor. This would break few logic inside GlusterFS,
+ like quota behavior etc, thus we need the exact number of blocks
+ which are consumed by the file to the higher layers inside GlusterFS.
+ Currently, this logic won't work for sparse files (ie, file with
+ holes)
+ */
+ {
+ uint64_t maxblocks;
+
+ maxblocks = (iatt->ia_size + 511) / 512;
+
+ if (iatt->ia_blocks > maxblocks)
+ iatt->ia_blocks = maxblocks;
+ }
+
iatt->ia_atime = stat->st_atime;
iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index e32eddb5d..15e0ccf78 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -134,8 +134,7 @@ __dentry_unset (dentry_t *dentry)
list_del_init (&dentry->inode_list);
- if (dentry->name)
- GF_FREE (dentry->name);
+ GF_FREE (dentry->name);
if (dentry->parent) {
__inode_unref (dentry->parent);
@@ -311,7 +310,7 @@ __inode_destroy (inode_t *inode)
goto noctx;
}
- for (index = 0; index < inode->table->xl->graph->xl_count; index++) {
+ for (index = 0; index < inode->table->ctxcount; index++) {
if (inode->_ctx[index].xl_key) {
xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
old_THIS = THIS;
@@ -529,10 +528,9 @@ __inode_create (inode_table_t *table)
INIT_LIST_HEAD (&newi->hash);
INIT_LIST_HEAD (&newi->dentry_list);
- newi->_ctx = GF_CALLOC (1, (sizeof (struct _inode_ctx) *
- table->xl->graph->xl_count),
+ newi->_ctx = GF_CALLOC (1,
+ (sizeof (struct _inode_ctx) * table->ctxcount),
gf_common_mt_inode_ctx);
-
if (newi->_ctx == NULL) {
LOCK_DESTROY (&newi->lock);
mem_put (newi);
@@ -830,14 +828,13 @@ __inode_link (inode_t *inode, inode_t *parent, const char *name,
if (uuid_is_null (iatt->ia_gfid))
return NULL;
- uuid_copy (inode->gfid, iatt->ia_gfid);
- inode->ia_type = iatt->ia_type;
-
- old_inode = __inode_find (table, inode->gfid);
+ old_inode = __inode_find (table, iatt->ia_gfid);
if (old_inode) {
link_inode = old_inode;
} else {
+ uuid_copy (inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
__inode_hash (inode);
}
}
@@ -950,6 +947,54 @@ inode_forget (inode_t *inode, uint64_t nlookup)
return 0;
}
+/*
+ * Invalidate an inode. This is invoked when a translator decides that an inode's
+ * cache is no longer valid. Any translator interested in taking action in this
+ * situation can define the invalidate callback.
+ */
+int
+inode_invalidate(inode_t *inode)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode) {
+ gf_log_callingfn(THIS->name, GF_LOG_WARNING, "inode not found");
+ return -1;
+ }
+
+ /*
+ * The master xlator is not in the graph but it can define an invalidate
+ * handler.
+ */
+ xl = inode->table->xl->ctx->master;
+ if (xl && xl->cbks->invalidate) {
+ old_THIS = THIS;
+ THIS = xl;
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+ if (ret)
+ return ret;
+ }
+
+ xl = inode->table->xl->graph->first;
+ while (xl) {
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->invalidate)
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+
+ if (ret)
+ break;
+
+ xl = xl->next;
+ }
+
+ return ret;
+}
+
static void
__inode_unlink (inode_t *inode, inode_t *parent, const char *name)
@@ -1157,9 +1202,7 @@ __inode_path (inode_t *inode, const char *name, char **bufp)
out:
if (__is_root_gfid (inode->gfid) && !name) {
ret = 1;
- if (buf) {
- GF_FREE (buf);
- }
+ GF_FREE (buf);
buf = GF_CALLOC (ret + 1, sizeof (char), gf_common_mt_char);
if (buf) {
strcpy (buf, "/");
@@ -1255,8 +1298,8 @@ __inode_table_init_root (inode_table_t *table)
iatt.ia_ino = 1;
iatt.ia_type = IA_IFDIR;
- table->root = root;
__inode_link (root, NULL, NULL, &iatt);
+ table->root = root;
}
@@ -1272,6 +1315,7 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
return NULL;
new->xl = xl;
+ new->ctxcount = xl->graph->xl_count + 1;
new->lru_limit = lru_limit;
@@ -1337,10 +1381,8 @@ inode_table_new (size_t lru_limit, xlator_t *xl)
out:
if (ret) {
if (new) {
- if (new->inode_hash)
- GF_FREE (new->inode_hash);
- if (new->name_hash)
- GF_FREE (new->name_hash);
+ GF_FREE (new->inode_hash);
+ GF_FREE (new->name_hash);
if (new->dentry_pool)
mem_pool_destroy (new->dentry_pool);
if (new->inode_pool)
@@ -1406,8 +1448,7 @@ inode_from_path (inode_table_t *itable, const char *path)
if (parent)
inode_unref (parent);
- if (pathname)
- GF_FREE (pathname);
+ GF_FREE (pathname);
out:
return inode;
@@ -1425,7 +1466,7 @@ __inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
if (!inode || !xlator)
return -1;
- for (index = 0; index < xlator->graph->xl_count; index++) {
+ for (index = 0; index < inode->table->ctxcount; index++) {
if (!inode->_ctx[index].xl_key) {
if (set_idx == -1)
set_idx = index;
@@ -1452,6 +1493,18 @@ out:
return ret;
}
+int
+__inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ return __inode_ctx_set2 (inode, xlator, value1_p, NULL);
+}
+
+int
+__inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ return __inode_ctx_set2 (inode, xlator, NULL, value2_p);
+}
+
int
inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
@@ -1471,34 +1524,97 @@ inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
return ret;
}
+int
+inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_set1 (inode, xlator, value2_p);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+int
+inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_set0 (inode, xlator, value1_p);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
int
__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
uint64_t *value2)
{
int index = 0;
- int ret = 0;
+ int ret = -1;
if (!inode || !xlator)
- return -1;
+ goto out;
- for (index = 0; index < xlator->graph->xl_count; index++) {
+ for (index = 0; index < inode->table->ctxcount; index++) {
if (inode->_ctx[index].xl_key == xlator)
break;
}
- if (index == xlator->graph->xl_count) {
- ret = -1;
+ if (index == inode->table->ctxcount)
goto out;
+
+ if (inode->_ctx[index].value1) {
+ if (value1)
+ *value1 = inode->_ctx[index].value1;
+ ret = 0;
+ }
+ if (inode->_ctx[index].value2) {
+ if (value2)
+ *value2 = inode->_ctx[index].value2;
+ ret = 0;
}
+out:
+ return ret;
+}
- if (value1)
- *value1 = inode->_ctx[index].value1;
- if (value2)
- *value2 = inode->_ctx[index].value2;
-out:
+int
+__inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1)
+{
+ uint64_t tmp_value = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get2 (inode, xlator, &tmp_value, NULL);
+ if (!ret)
+ *value1 = tmp_value;
+
+ return ret;
+}
+
+int
+__inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ uint64_t tmp_value = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get2 (inode, xlator, NULL, &tmp_value);
+ if (!ret)
+ *value2 = tmp_value;
+
return ret;
}
@@ -1521,6 +1637,40 @@ inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
return ret;
}
+int
+inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, xlator, value2);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get0 (inode, xlator, value1);
+ }
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
int
inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
@@ -1534,19 +1684,20 @@ inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
LOCK (&inode->lock);
{
- for (index = 0; index < xlator->graph->xl_count; index++) {
+ for (index = 0; index < inode->table->ctxcount;
+ index++) {
if (inode->_ctx[index].xl_key == xlator)
break;
}
- if (index == xlator->graph->xl_count) {
+ if (index == inode->table->ctxcount) {
ret = -1;
goto unlock;
}
- if (value1)
+ if (inode->_ctx[index].value1 && value1)
*value1 = inode->_ctx[index].value1;
- if (value2)
+ if (inode->_ctx[index].value2 && value2)
*value2 = inode->_ctx[index].value2;
inode->_ctx[index].key = 0;
@@ -1559,6 +1710,97 @@ unlock:
return ret;
}
+/* function behavior:
+ - if value1 is set, value1 in ctx is reset to 0 with current value passed
+ back in value1 address.
+ - if value2 is set, value2 in ctx is reset to 0 with current value passed
+ back in value2 address.
+ - if both are set, both fields are reset.
+*/
+static int
+__inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK (&inode->lock);
+ {
+ for (index = 0; index < inode->table->ctxcount;
+ index++) {
+ if (inode->_ctx[index].xl_key == xlator)
+ break;
+ }
+
+ if (index == inode->table->ctxcount) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (inode->_ctx[index].value1 && value1) {
+ *value1 = inode->_ctx[index].value1;
+ inode->_ctx[index].value1 = 0;
+ }
+ if (inode->_ctx[index].value2 && value2) {
+ *value2 = inode->_ctx[index].value2;
+ inode->_ctx[index].value2 = 0;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ return ret;
+}
+
+int
+inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ uint64_t tmp_value1 = 0;
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, &tmp_value1, &tmp_value2);
+ if (!ret) {
+ if (value1_p)
+ *value1_p = tmp_value1;
+ if (value2_p)
+ *value2_p = tmp_value2;
+ }
+ return ret;
+}
+
+int
+inode_ctx_reset1 (inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, NULL, &tmp_value2);
+
+ if (!ret && value2_p)
+ *value2_p = tmp_value2;
+
+ return ret;
+
+}
+int
+inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ uint64_t tmp_value1 = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_reset2 (inode, xlator, &tmp_value1, NULL);
+
+ if (!ret && value1_p)
+ *value1_p = tmp_value1;
+
+ return ret;
+}
+
void
inode_dump (inode_t *inode, char *prefix)
@@ -1583,17 +1825,19 @@ inode_dump (inode_t *inode, char *prefix)
{
gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
+ gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
- inode_ctx = GF_CALLOC (inode->table->xl->graph->xl_count,
+ inode_ctx = GF_CALLOC (inode->table->ctxcount,
sizeof (*inode_ctx),
gf_common_mt_inode_ctx);
if (inode_ctx == NULL) {
goto unlock;
}
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
+ for (i = 0; i < inode->table->ctxcount;
+ i++) {
inode_ctx[i] = inode->_ctx[i];
}
}
@@ -1610,7 +1854,7 @@ unlock:
UNLOCK(&inode->lock);
if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
+ for (i = 0; i < inode->table->ctxcount; i++) {
if (inode_ctx[i].xl_key) {
xl = (xlator_t *)(long)inode_ctx[i].xl_key;
if (xl->dumpops && xl->dumpops->inodectx)
@@ -1619,9 +1863,7 @@ unlock:
}
}
- if (inode_ctx != NULL) {
- GF_FREE (inode_ctx);
- }
+ GF_FREE (inode_ctx);
return;
}
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index 41003df71..a88976265 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -56,6 +56,7 @@ struct _inode_table {
struct mem_pool *inode_pool; /* memory pool for inodes */
struct mem_pool *dentry_pool; /* memory pool for dentrys */
struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
};
@@ -72,10 +73,12 @@ struct _inode_ctx {
uint64_t key;
xlator_t *xl_key;
};
+ /* if value1 is 0, then field is not set.. */
union {
uint64_t value1;
void *ptr1;
};
+ /* if value2 is 0, then field is not set.. */
union {
uint64_t value2;
void *ptr2;
@@ -87,6 +90,7 @@ struct _inode {
uuid_t gfid;
gf_lock_t lock;
uint64_t nlookup;
+ uint32_t fd_count; /* Open fd count */
uint32_t ref; /* reference count on this inode */
ia_type_t ia_type; /* what kind of file */
struct list_head fd_list; /* list of open files on this inode */
@@ -131,6 +135,9 @@ int
inode_forget (inode_t *inode, uint64_t nlookup);
int
+inode_invalidate(inode_t *inode);
+
+int
inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,
inode_t *newdir, const char *newname,
inode_t *inode, struct iatt *stbuf);
@@ -154,6 +161,11 @@ __inode_path (inode_t *inode, const char *name, char **bufp);
inode_t *
inode_from_path (inode_table_t *table, const char *path);
+inode_t *
+inode_resolve (inode_table_t *table, char *path);
+
+/* deal with inode ctx's both values */
+
int
inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
uint64_t *value2);
@@ -172,29 +184,66 @@ int
inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
uint64_t *value2);
-inode_t *
-inode_resolve (inode_table_t *table, char *path);
+int
+inode_ctx_reset2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+/* deal with inode ctx's 1st value */
+
+int
+inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+__inode_ctx_set0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+int
+__inode_ctx_get0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+/* deal with inode ctx's 2st value */
+
+int
+inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+__inode_ctx_set1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+int
+__inode_ctx_get1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_reset1 (inode_t *inode, xlator_t *xlator, uint64_t *value2);
-#define __inode_ctx_set(i,x,v_p) __inode_ctx_set2(i,x,v_p,0)
-#define inode_ctx_set(i,x,v_p) inode_ctx_set2(i,x,v_p,0)
static inline int
__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
{
- return __inode_ctx_set2 (inode, this, &v, 0);
+ return __inode_ctx_set0 (inode, this, &v);
}
static inline int
inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
{
- return inode_ctx_set2(inode, this, &v, 0);
+ return inode_ctx_set0 (inode, this, &v);
}
-#define __inode_ctx_get(i,x,v) __inode_ctx_get2(i,x,v,0)
-#define inode_ctx_get(i,x,v) inode_ctx_get2(i,x,v,0)
+#define __inode_ctx_set(i,x,v_p) __inode_ctx_set0(i,x,v_p)
-#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
+#define inode_ctx_set(i,x,v_p) inode_ctx_set0(i,x,v_p)
+#define inode_ctx_reset(i,x,v) inode_ctx_reset0(i,x,v)
+
+#define __inode_ctx_get(i,x,v) __inode_ctx_get0(i,x,v)
+
+#define inode_ctx_get(i,x,v) inode_ctx_get0(i,x,v)
+
+#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
gf_boolean_t
__is_root_gfid (uuid_t gfid);
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index f68c6c748..a89e96267 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -562,8 +562,7 @@ iobuf_get_from_stdalloc (struct iobuf_pool *iobuf_pool, size_t page_size)
ret = 0;
out:
if (ret && iobuf) {
- if (iobuf->free_ptr)
- GF_FREE (iobuf->free_ptr);
+ GF_FREE (iobuf->free_ptr);
GF_FREE (iobuf);
iobuf = NULL;
}
@@ -842,6 +841,29 @@ out:
}
+void
+iobref_clear (struct iobref *iobref)
+{
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+
+ for (; i < GF_IOBREF_IOBUF_COUNT; i++) {
+ if (iobref->iobrefs[i] != NULL) {
+ iobuf_unref (iobref->iobrefs[i]);
+ } else {
+ /** iobuf's are attched serially */
+ break;
+ }
+ }
+
+ iobref_unref (iobref);
+
+ out:
+ return;
+}
+
+
int
__iobref_add (struct iobref *iobref, struct iobuf *iobuf)
{
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/iobuf.h
index b9c2a3807..5595309e1 100644
--- a/libglusterfs/src/iobuf.h
+++ b/libglusterfs/src/iobuf.h
@@ -150,7 +150,7 @@ struct iobref *iobref_ref (struct iobref *iobref);
void iobref_unref (struct iobref *iobref);
int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
int iobref_merge (struct iobref *to, struct iobref *from);
-
+void iobref_clear (struct iobref *iobref);
size_t iobuf_size (struct iobuf *iobuf);
size_t iobref_size (struct iobref *iobref);
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index 58e8b9158..b22f72950 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -26,87 +26,87 @@ gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void
{
glusterfs_fop_t fop = -1;
- if (fops->stat == fn)
+ if (fops->stat == *(fop_stat_t *)&fn)
fop = GF_FOP_STAT;
- else if (fops->readlink == fn)
+ else if (fops->readlink == *(fop_readlink_t *)&fn)
fop = GF_FOP_READLINK;
- else if (fops->mknod == fn)
+ else if (fops->mknod == *(fop_mknod_t *)&fn)
fop = GF_FOP_MKNOD;
- else if (fops->mkdir == fn)
+ else if (fops->mkdir == *(fop_mkdir_t *)&fn)
fop = GF_FOP_MKDIR;
- else if (fops->unlink == fn)
+ else if (fops->unlink == *(fop_unlink_t *)&fn)
fop = GF_FOP_UNLINK;
- else if (fops->rmdir == fn)
+ else if (fops->rmdir == *(fop_rmdir_t *)&fn)
fop = GF_FOP_RMDIR;
- else if (fops->symlink == fn)
+ else if (fops->symlink == *(fop_symlink_t *)&fn)
fop = GF_FOP_SYMLINK;
- else if (fops->rename == fn)
+ else if (fops->rename == *(fop_rename_t *)&fn)
fop = GF_FOP_RENAME;
- else if (fops->link == fn)
+ else if (fops->link == *(fop_link_t *)&fn)
fop = GF_FOP_LINK;
- else if (fops->truncate == fn)
+ else if (fops->truncate == *(fop_truncate_t *)&fn)
fop = GF_FOP_TRUNCATE;
- else if (fops->open == fn)
+ else if (fops->open == *(fop_open_t *)&fn)
fop = GF_FOP_OPEN;
- else if (fops->readv == fn)
+ else if (fops->readv == *(fop_readv_t *)&fn)
fop = GF_FOP_READ;
- else if (fops->writev == fn)
+ else if (fops->writev == *(fop_writev_t *)&fn)
fop = GF_FOP_WRITE;
- else if (fops->statfs == fn)
+ else if (fops->statfs == *(fop_statfs_t *)&fn)
fop = GF_FOP_STATFS;
- else if (fops->flush == fn)
+ else if (fops->flush == *(fop_flush_t *)&fn)
fop = GF_FOP_FLUSH;
- else if (fops->fsync == fn)
+ else if (fops->fsync == *(fop_fsync_t *)&fn)
fop = GF_FOP_FSYNC;
- else if (fops->setxattr == fn)
+ else if (fops->setxattr == *(fop_setxattr_t *)&fn)
fop = GF_FOP_SETXATTR;
- else if (fops->getxattr == fn)
+ else if (fops->getxattr == *(fop_getxattr_t *)&fn)
fop = GF_FOP_GETXATTR;
- else if (fops->removexattr == fn)
+ else if (fops->removexattr == *(fop_removexattr_t *)&fn)
fop = GF_FOP_REMOVEXATTR;
- else if (fops->opendir == fn)
+ else if (fops->opendir == *(fop_opendir_t *)&fn)
fop = GF_FOP_OPENDIR;
- else if (fops->fsyncdir == fn)
+ else if (fops->fsyncdir == *(fop_fsyncdir_t *)&fn)
fop = GF_FOP_FSYNCDIR;
- else if (fops->access == fn)
+ else if (fops->access == *(fop_access_t *)&fn)
fop = GF_FOP_ACCESS;
- else if (fops->create == fn)
+ else if (fops->create == *(fop_create_t *)&fn)
fop = GF_FOP_CREATE;
- else if (fops->ftruncate == fn)
+ else if (fops->ftruncate == *(fop_ftruncate_t *)&fn)
fop = GF_FOP_FTRUNCATE;
- else if (fops->fstat == fn)
+ else if (fops->fstat == *(fop_fstat_t *)&fn)
fop = GF_FOP_FSTAT;
- else if (fops->lk == fn)
+ else if (fops->lk == *(fop_lk_t *)&fn)
fop = GF_FOP_LK;
- else if (fops->lookup == fn)
+ else if (fops->lookup == *(fop_lookup_t *)&fn)
fop = GF_FOP_LOOKUP;
- else if (fops->readdir == fn)
+ else if (fops->readdir == *(fop_readdir_t *)&fn)
fop = GF_FOP_READDIR;
- else if (fops->inodelk == fn)
+ else if (fops->inodelk == *(fop_inodelk_t *)&fn)
fop = GF_FOP_INODELK;
- else if (fops->finodelk == fn)
+ else if (fops->finodelk == *(fop_finodelk_t *)&fn)
fop = GF_FOP_FINODELK;
- else if (fops->entrylk == fn)
+ else if (fops->entrylk == *(fop_entrylk_t *)&fn)
fop = GF_FOP_ENTRYLK;
- else if (fops->fentrylk == fn)
+ else if (fops->fentrylk == *(fop_fentrylk_t *)&fn)
fop = GF_FOP_FENTRYLK;
- else if (fops->xattrop == fn)
+ else if (fops->xattrop == *(fop_xattrop_t *)&fn)
fop = GF_FOP_XATTROP;
- else if (fops->fxattrop == fn)
+ else if (fops->fxattrop == *(fop_fxattrop_t *)&fn)
fop = GF_FOP_FXATTROP;
- else if (fops->fgetxattr == fn)
+ else if (fops->fgetxattr == *(fop_fgetxattr_t *)&fn)
fop = GF_FOP_FGETXATTR;
- else if (fops->fsetxattr == fn)
+ else if (fops->fsetxattr == *(fop_fsetxattr_t *)&fn)
fop = GF_FOP_FSETXATTR;
- else if (fops->rchecksum == fn)
+ else if (fops->rchecksum == *(fop_rchecksum_t *)&fn)
fop = GF_FOP_RCHECKSUM;
- else if (fops->setattr == fn)
+ else if (fops->setattr == *(fop_setattr_t *)&fn)
fop = GF_FOP_SETATTR;
- else if (fops->fsetattr == fn)
+ else if (fops->fsetattr == *(fop_fsetattr_t *)&fn)
fop = GF_FOP_FSETATTR;
- else if (fops->readdirp == fn)
+ else if (fops->readdirp == *(fop_readdirp_t *)&fn)
fop = GF_FOP_READDIRP;
- else if (fops->getspec == fn)
+ else if (fops->getspec == *(fop_getspec_t *)&fn)
fop = GF_FOP_GETSPEC;
else
fop = -1;
@@ -136,6 +136,22 @@ gf_update_latency (call_frame_t *frame)
lat->mean = lat->mean + (elapsed - lat->mean) / lat->count;
}
+void
+gf_latency_begin (call_frame_t *frame, void *fn)
+{
+ gf_set_fop_from_fn_pointer (frame, frame->this->fops, fn);
+
+ gettimeofday (&frame->begin, NULL);
+}
+
+
+void
+gf_latency_end (call_frame_t *frame)
+{
+ gettimeofday (&frame->end, NULL);
+
+ gf_update_latency (frame);
+}
void
gf_proc_dump_latency_info (xlator_t *xl)
@@ -148,23 +164,22 @@ gf_proc_dump_latency_info (xlator_t *xl)
gf_proc_dump_add_section (key_prefix);
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key (key, key_prefix, gf_fop_list[i]);
+ gf_proc_dump_build_key (key, key_prefix,
+ (char *)gf_fop_list[i]);
gf_proc_dump_write (key, "%.03f,%"PRId64",%.03f",
xl->latencies[i].mean,
xl->latencies[i].count,
xl->latencies[i].total);
}
+
+ memset (xl->latencies, 0, sizeof (xl->latencies));
}
void
-gf_latency_toggle (int signum)
+gf_latency_toggle (int signum, glusterfs_ctx_t *ctx)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = glusterfs_ctx_get ();
-
if (ctx) {
ctx->measure_latency = !ctx->measure_latency;
gf_log ("[core]", GF_LOG_INFO,
diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/latency.h
index 16c5994b0..81acbf484 100644
--- a/libglusterfs/src/latency.h
+++ b/libglusterfs/src/latency.h
@@ -11,6 +11,7 @@
#ifndef __LATENCY_H__
#define __LATENCY_H__
+#include "glusterfs.h"
typedef struct fop_latency {
uint64_t min; /* min time for the call (microseconds) */
@@ -22,6 +23,6 @@ typedef struct fop_latency {
} fop_latency_t;
void
-gf_latency_toggle (int signum);
+gf_latency_toggle (int signum, glusterfs_ctx_t *ctx);
#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
index 35fccdf25..392c22ceb 100644
--- a/libglusterfs/src/list.h
+++ b/libglusterfs/src/list.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -11,7 +11,6 @@
#ifndef _LLIST_H
#define _LLIST_H
-
struct list_head {
struct list_head *next;
struct list_head *prev;
@@ -45,6 +44,31 @@ list_add_tail (struct list_head *new, struct list_head *head)
}
+/* This function will insert the element to the list in a order.
+ Order will be based on the compare function provided as a input.
+ If element to be inserted in ascending order compare should return:
+ 0: if both the arguments are equal
+ >0: if first argument is greater than second argument
+ <0: if first argument is less than second argument */
+static inline void
+list_add_order (struct list_head *new, struct list_head *head,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_head *pos = head->prev;
+
+ while ( pos != head ) {
+ if (compare(new, pos) >= 0)
+ break;
+
+ /* Iterate the list in the reverse order. This will have
+ better efficiency if the elements are inserted in the
+ ascending order */
+ pos = pos->prev;
+ }
+
+ list_add (new, pos);
+}
+
static inline void
list_del (struct list_head *old)
{
@@ -175,4 +199,16 @@ list_append_init (struct list_head *list, struct list_head *head)
&pos->member != (head); \
pos = n, n = list_entry(n->member.next, typeof(*n), member))
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
#endif /* _LLIST_H */
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index c47237f82..5deb90cda 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -22,9 +22,22 @@
#include <string.h>
#include <stdlib.h>
+#ifdef GF_USE_SYSLOG
+#include <libintl.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include "gf-error-codes.h"
+
+#define GF_JSON_MSG_LENGTH 8192
+#define GF_SYSLOG_CEE_FORMAT \
+ "@cee: {\"msg\": \"%s\", \"gf_code\": \"%u\", \"gf_message\": \"%s\"}"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+#endif /* GF_USE_SYSLOG */
+
#include "xlator.h"
#include "logging.h"
#include "defaults.h"
+#include "glusterfs.h"
#ifdef GF_LINUX_HOST_OS
#include <syslog.h>
@@ -34,50 +47,44 @@
#include <execinfo.h>
#endif
+/* Ideally this should get moved to logging.h */
+struct _msg_queue {
+ struct list_head msgs;
+};
-static pthread_mutex_t logfile_mutex;
-static char *filename = NULL;
-static uint8_t logrotate = 0;
-static FILE *logfile = NULL;
-static gf_loglevel_t loglevel = GF_LOG_INFO;
-static int gf_log_syslog = 1;
-static gf_loglevel_t sys_log_level = GF_LOG_CRITICAL;
-
-char gf_log_xl_log_set;
-gf_loglevel_t gf_log_loglevel = GF_LOG_INFO; /* extern'd */
-FILE *gf_log_logfile;
-
-static char *cmd_log_filename = NULL;
-static FILE *cmdlogfile = NULL;
+struct _log_msg {
+ const char *msg;
+ struct list_head queue;
+};
void
gf_log_logrotate (int signum)
{
- logrotate = 1;
+ THIS->ctx->log.logrotate = 1;
}
void
gf_log_enable_syslog (void)
{
- gf_log_syslog = 1;
+ THIS->ctx->log.gf_log_syslog = 1;
}
void
gf_log_disable_syslog (void)
{
- gf_log_syslog = 0;
+ THIS->ctx->log.gf_log_syslog = 0;
}
gf_loglevel_t
gf_log_get_loglevel (void)
{
- return loglevel;
+ return THIS->ctx->log.loglevel;
}
void
gf_log_set_loglevel (gf_loglevel_t level)
{
- gf_log_loglevel = loglevel = level;
+ THIS->ctx->log.loglevel = level;
}
@@ -96,47 +103,283 @@ gf_log_set_xl_loglevel (void *this, gf_loglevel_t level)
xlator_t *xl = this;
if (!xl)
return;
- gf_log_xl_log_set = 1;
+ xl->ctx->log.gf_log_xl_log_set = 1;
xl->loglevel = level;
}
void
gf_log_fini (void)
{
- pthread_mutex_destroy (&logfile_mutex);
+ pthread_mutex_destroy (&THIS->ctx->log.logfile_mutex);
+}
+
+
+#ifdef GF_USE_SYSLOG
+/**
+ * gf_get_error_message -function to get error message for given error code
+ * @error_code: error code defined by log book
+ *
+ * @return: success: string
+ * failure: NULL
+ */
+const char *
+gf_get_error_message (int error_code) {
+ return _gf_get_message (error_code);
+}
+
+
+/**
+ * gf_openlog -function to open syslog specific to gluster based on
+ * existence of file /etc/glusterfs/logger.conf
+ * @ident: optional identification string similar to openlog()
+ * @option: optional value to option to openlog(). Passing -1 uses
+ * 'LOG_PID | LOG_NDELAY' as default
+ * @facility: optional facility code similar to openlog(). Passing -1
+ * uses LOG_DAEMON as default
+ *
+ * @return: void
+ */
+void
+gf_openlog (const char *ident, int option, int facility)
+{
+ int _option = option;
+ int _facility = facility;
+
+ if (-1 == _option) {
+ _option = LOG_PID | LOG_NDELAY;
+ }
+ if (-1 == _facility) {
+ _facility = LOG_LOCAL1;
+ }
+
+ setlocale(LC_ALL, "");
+ bindtextdomain("gluster", "/usr/share/locale");
+ textdomain("gluster");
+
+ openlog(ident, _option, _facility);
+}
+
+
+/**
+ * _json_escape -function to convert string to json encoded string
+ * @str: input string
+ * @buf: buffer to store encoded string
+ * @len: length of @buf
+ *
+ * @return: success: last unprocessed character position by pointer in @str
+ * failure: NULL
+ *
+ * Internal function. Heavily inspired by _ul_str_escape() function in
+ * libumberlog
+ *
+ * Sample output:
+ * [1] str = "devel error"
+ * buf = "devel error"
+ * [2] str = "devel error"
+ * buf = "devel\terror"
+ * [3] str = "I/O error on "/tmp/foo" file"
+ * buf = "I/O error on \"/tmp/foo\" file"
+ * [4] str = "I/O erroron /tmp/bar file"
+ * buf = "I/O error\u001bon /tmp/bar file"
+ *
+ */
+char *
+_json_escape(const char *str, char *buf, size_t len)
+{
+ static const unsigned char json_exceptions[UCHAR_MAX + 1] =
+ {
+ [0x01] = 1, [0x02] = 1, [0x03] = 1, [0x04] = 1,
+ [0x05] = 1, [0x06] = 1, [0x07] = 1, [0x08] = 1,
+ [0x09] = 1, [0x0a] = 1, [0x0b] = 1, [0x0c] = 1,
+ [0x0d] = 1, [0x0e] = 1, [0x0f] = 1, [0x10] = 1,
+ [0x11] = 1, [0x12] = 1, [0x13] = 1, [0x14] = 1,
+ [0x15] = 1, [0x16] = 1, [0x17] = 1, [0x18] = 1,
+ [0x19] = 1, [0x1a] = 1, [0x1b] = 1, [0x1c] = 1,
+ [0x1d] = 1, [0x1e] = 1, [0x1f] = 1,
+ ['\\'] = 1, ['"'] = 1
+ };
+ static const char json_hex_chars[16] = "0123456789abcdef";
+ unsigned char *p = NULL;
+ size_t pos = 0;
+
+ if (!str || !buf || len <= 0) {
+ return NULL;
+ }
+
+ for (p = (unsigned char *)str;
+ *p && (pos + 1) < len;
+ p++)
+ {
+ if (json_exceptions[*p] == 0) {
+ buf[pos++] = *p;
+ continue;
+ }
+
+ if ((pos + 2) >= len) {
+ break;
+ }
+
+ switch (*p)
+ {
+ case '\b':
+ buf[pos++] = '\\';
+ buf[pos++] = 'b';
+ break;
+ case '\n':
+ buf[pos++] = '\\';
+ buf[pos++] = 'n';
+ break;
+ case '\r':
+ buf[pos++] = '\\';
+ buf[pos++] = 'r';
+ break;
+ case '\t':
+ buf[pos++] = '\\';
+ buf[pos++] = 't';
+ break;
+ case '\\':
+ buf[pos++] = '\\';
+ buf[pos++] = '\\';
+ break;
+ case '"':
+ buf[pos++] = '\\';
+ buf[pos++] = '"';
+ break;
+ default:
+ if ((pos + 6) >= len) {
+ buf[pos] = '\0';
+ return (char *)p;
+ }
+ buf[pos++] = '\\';
+ buf[pos++] = 'u';
+ buf[pos++] = '0';
+ buf[pos++] = '0';
+ buf[pos++] = json_hex_chars[(*p) >> 4];
+ buf[pos++] = json_hex_chars[(*p) & 0xf];
+ break;
+ }
+ }
+
+ buf[pos] = '\0';
+ return (char *)p;
}
+/**
+ * gf_syslog -function to submit message to syslog specific to gluster
+ * @error_code: error code defined by log book
+ * @facility_priority: facility_priority of syslog()
+ * @format: optional format string to syslog()
+ *
+ * @return: void
+ */
void
-gf_log_globals_init (void)
+gf_syslog (int error_code, int facility_priority, char *format, ...)
{
- pthread_mutex_init (&logfile_mutex, NULL);
+ char *msg = NULL;
+ char json_msg[GF_JSON_MSG_LENGTH];
+ GF_UNUSED char *p = NULL;
+ const char *error_message = NULL;
+ char json_error_message[GF_JSON_MSG_LENGTH];
+ va_list ap;
+
+ error_message = gf_get_error_message (error_code);
+
+ va_start (ap, format);
+ if (format) {
+ vasprintf (&msg, format, ap);
+ p = _json_escape (msg, json_msg, GF_JSON_MSG_LENGTH);
+ if (error_message) {
+ p = _json_escape (error_message, json_error_message,
+ GF_JSON_MSG_LENGTH);
+ syslog (facility_priority, GF_SYSLOG_CEE_FORMAT,
+ json_msg, error_code, json_error_message);
+ } else {
+ /* ignore the error code because no error message for it
+ and use normal syslog */
+ syslog (facility_priority, "%s", msg);
+ }
+ free (msg);
+ } else {
+ if (error_message) {
+ /* no user message: treat error_message as msg */
+ syslog (facility_priority, GF_SYSLOG_CEE_FORMAT,
+ json_error_message, error_code,
+ json_error_message);
+ } else {
+ /* cannot produce log as neither error_message nor
+ msg available */
+ }
+ }
+ va_end (ap);
+}
+#endif /* GF_USE_SYSLOG */
+void
+gf_log_globals_init (void *data)
+{
+ glusterfs_ctx_t *ctx = data;
+
+ pthread_mutex_init (&ctx->log.logfile_mutex, NULL);
+
+ ctx->log.loglevel = GF_LOG_INFO;
+ ctx->log.gf_log_syslog = 1;
+ ctx->log.sys_log_level = GF_LOG_CRITICAL;
+
+#ifndef GF_USE_SYSLOG
#ifdef GF_LINUX_HOST_OS
/* For the 'syslog' output. one can grep 'GlusterFS' in syslog
for serious logs */
openlog ("GlusterFS", LOG_PID, LOG_DAEMON);
#endif
+#endif
}
int
-gf_log_init (const char *file)
+gf_log_init (void *data, const char *file, const char *ident)
{
+ glusterfs_ctx_t *ctx = NULL;
int fd = -1;
+ ctx = data;
+
+#if defined(GF_USE_SYSLOG)
+ {
+ /* use default ident and option */
+ /* TODO: make FACILITY configurable than LOG_DAEMON */
+ struct stat buf;
+
+ if (stat (GF_LOG_CONTROL_FILE, &buf) == 0) {
+ /* use syslog logging */
+ ctx->log.log_control_file_found = 1;
+ if (ident) {
+ /* we need to keep this value as */
+ /* syslog uses it on every logging */
+ ctx->log.ident = gf_strdup (ident);
+ gf_openlog (ctx->log.ident, -1, LOG_DAEMON);
+ } else {
+ gf_openlog (NULL, -1, LOG_DAEMON);
+ }
+ } else {
+ /* use old style logging */
+ ctx->log.log_control_file_found = 0;
+ }
+ }
+#endif
+
if (!file){
fprintf (stderr, "ERROR: no filename specified\n");
return -1;
}
if (strcmp (file, "-") == 0) {
- gf_log_logfile = stderr;
-
+ ctx->log.gf_log_logfile = stderr;
+ ctx->log.logfile = stderr;
return 0;
}
- filename = gf_strdup (file);
- if (!filename) {
+ ctx->log.filename = gf_strdup (file);
+ if (!ctx->log.filename) {
fprintf (stderr, "ERROR: updating log-filename failed: %s\n",
strerror (errno));
return -1;
@@ -150,54 +393,22 @@ gf_log_init (const char *file)
}
close (fd);
- logfile = fopen (file, "a");
- if (!logfile){
+ ctx->log.logfile = fopen (file, "a");
+ if (!ctx->log.logfile){
fprintf (stderr, "ERROR: failed to open logfile \"%s\" (%s)\n",
file, strerror (errno));
return -1;
}
- gf_log_logfile = logfile;
+ ctx->log.gf_log_logfile = ctx->log.logfile;
return 0;
}
-
-
-struct _msg_queue {
- struct list_head msgs;
-};
-
-struct _log_msg {
- const char *msg;
- struct list_head queue;
-};
-
-
-void
-gf_log_lock (void)
-{
- pthread_mutex_lock (&logfile_mutex);
-}
-
-
-void
-gf_log_unlock (void)
-{
- pthread_mutex_unlock (&logfile_mutex);
-}
-
-
-void
-gf_log_cleanup (void)
-{
- pthread_mutex_destroy (&logfile_mutex);
-}
-
void
set_sys_log_level (gf_loglevel_t level)
{
- sys_log_level = level;
+ THIS->ctx->log.sys_log_level = level;
}
int
@@ -206,22 +417,23 @@ _gf_log_nomem (const char *domain, const char *file,
size_t size)
{
const char *basename = NULL;
- struct tm *tm = NULL;
xlator_t *this = NULL;
struct timeval tv = {0,};
int ret = 0;
- char msg[8092];
- char timestr[256];
- char callstr[4096];
+ char msg[8092] = {0,};
+ char timestr[256] = {0,};
+ char callstr[4096] = {0,};
+ glusterfs_ctx_t *ctx = NULL;
this = THIS;
+ ctx = this->ctx;
- if (gf_log_xl_log_set) {
+ if (ctx->log.gf_log_xl_log_set) {
if (this->loglevel && (level > this->loglevel))
goto out;
- else if (level > gf_log_loglevel)
- goto out;
}
+ if (level > ctx->log.loglevel)
+ goto out;
static char *level_strings[] = {"", /* NONE */
"M", /* EMERGENCY */
@@ -242,6 +454,12 @@ _gf_log_nomem (const char *domain, const char *file,
return -1;
}
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
#if HAVE_BACKTRACE
/* Print 'calling function' */
do {
@@ -268,36 +486,45 @@ _gf_log_nomem (const char *domain, const char *file,
} while (0);
#endif /* HAVE_BACKTRACE */
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
+ }
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %s %s: no memory "
+ "available for size (%"GF_PRI_SIZET")",
+ basename, line, function, callstr, domain,
+ size);
+ goto out;
+ }
+#endif /* GF_USE_SYSLOG */
ret = gettimeofday (&tv, NULL);
if (-1 == ret)
goto out;
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
- tm = localtime (&tv.tv_sec);
+ ret = sprintf (msg, "[%s] %s [%s:%d:%s] %s %s: no memory "
+ "available for size (%"GF_PRI_SIZET")",
+ timestr, level_strings[level],
+ basename, line, function, callstr,
+ domain, size);
+ if (-1 == ret) {
+ goto out;
+ }
- pthread_mutex_lock (&logfile_mutex);
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
{
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = sprintf (msg, "[%s] %s [%s:%d:%s] %s %s: no memory "
- "available for size (%"GF_PRI_SIZET")",
- timestr, level_strings[level],
- basename, line, function, callstr,
- domain, size);
- if (-1 == ret) {
- goto unlock;
- }
-
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
} else {
fprintf (stderr, "%s\n", msg);
}
@@ -305,13 +532,13 @@ _gf_log_nomem (const char *domain, const char *file,
#ifdef GF_LINUX_HOST_OS
/* We want only serious log in 'syslog', not our debug
and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
syslog ((level-1), "%s\n", msg);
#endif
}
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
out:
return ret;
}
@@ -321,7 +548,6 @@ _gf_log_callingfn (const char *domain, const char *file, const char *function,
int line, gf_loglevel_t level, const char *fmt, ...)
{
const char *basename = NULL;
- struct tm *tm = NULL;
xlator_t *this = NULL;
char *str1 = NULL;
char *str2 = NULL;
@@ -332,15 +558,17 @@ _gf_log_callingfn (const char *domain, const char *file, const char *function,
size_t len = 0;
int ret = 0;
va_list ap;
+ glusterfs_ctx_t *ctx = NULL;
this = THIS;
+ ctx = this->ctx;
- if (gf_log_xl_log_set) {
+ if (ctx->log.gf_log_xl_log_set) {
if (this->loglevel && (level > this->loglevel))
goto out;
- else if (level > gf_log_loglevel)
- goto out;
}
+ if (level > ctx->log.loglevel)
+ goto out;
static char *level_strings[] = {"", /* NONE */
"M", /* EMERGENCY */
@@ -361,6 +589,12 @@ _gf_log_callingfn (const char *domain, const char *file, const char *function,
return -1;
}
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
#if HAVE_BACKTRACE
/* Print 'calling function' */
do {
@@ -387,50 +621,65 @@ _gf_log_callingfn (const char *domain, const char *file, const char *function,
} while (0);
#endif /* HAVE_BACKTRACE */
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
+ }
+
+ va_start (ap, fmt);
+ vasprintf (&str2, fmt, ap);
+ va_end (ap);
+
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %s %d-%s: %s",
+ basename, line, function,
+ callstr,
+ ((this->graph) ? this->graph->id:0), domain,
+ str2);
+
+ goto out;
+ }
+#endif /* GF_USE_SYSLOG */
ret = gettimeofday (&tv, NULL);
if (-1 == ret)
goto out;
+ va_start (ap, fmt);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
- tm = localtime (&tv.tv_sec);
-
- pthread_mutex_lock (&logfile_mutex);
- {
- va_start (ap, fmt);
-
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ",
- timestr, level_strings[level],
- basename, line, function, callstr,
- ((this->graph) ? this->graph->id:0), domain);
- if (-1 == ret) {
- goto unlock;
- }
+ ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ",
+ timestr, level_strings[level],
+ basename, line, function, callstr,
+ ((this->graph) ? this->graph->id:0), domain);
+ if (-1 == ret) {
+ goto out;
+ }
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto unlock;
- }
+ ret = vasprintf (&str2, fmt, ap);
+ if (-1 == ret) {
+ goto out;
+ }
- va_end (ap);
+ va_end (ap);
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+ len = strlen (str1);
+ msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ strcpy (msg, str1);
+ strcpy (msg + len, str2);
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
} else {
fprintf (stderr, "%s\n", msg);
}
@@ -438,25 +687,21 @@ _gf_log_callingfn (const char *domain, const char *file, const char *function,
#ifdef GF_LINUX_HOST_OS
/* We want only serious log in 'syslog', not our debug
and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
syslog ((level-1), "%s\n", msg);
#endif
}
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
- if (msg) {
- GF_FREE (msg);
- }
+out:
+ GF_FREE (msg);
- if (str1)
- GF_FREE (str1);
+ GF_FREE (str1);
- if (str2)
- FREE (str2);
+ FREE (str2);
-out:
return ret;
}
@@ -464,29 +709,29 @@ int
_gf_log (const char *domain, const char *file, const char *function, int line,
gf_loglevel_t level, const char *fmt, ...)
{
- const char *basename = NULL;
- FILE *new_logfile = NULL;
- va_list ap;
- struct tm *tm = NULL;
- char timestr[256];
+ const char *basename = NULL;
+ FILE *new_logfile = NULL;
+ va_list ap;
+ char timestr[256] = {0,};
struct timeval tv = {0,};
-
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
- int fd = -1;
- xlator_t *this = NULL;
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ size_t len = 0;
+ int ret = 0;
+ int fd = -1;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
this = THIS;
+ ctx = this->ctx;
- if (gf_log_xl_log_set) {
+ if (ctx->log.gf_log_xl_log_set) {
if (this->loglevel && (level > this->loglevel))
goto out;
- else if (level > gf_log_loglevel)
- goto out;
}
+ if (level > ctx->log.loglevel)
+ goto out;
static char *level_strings[] = {"", /* NONE */
"M", /* EMERGENCY */
@@ -507,11 +752,41 @@ _gf_log (const char *domain, const char *file, const char *function, int line,
return -1;
}
+ basename = strrchr (file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
- if (logrotate) {
- logrotate = 0;
+#if defined(GF_USE_SYSLOG)
+ if (ctx->log.log_control_file_found)
+ {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
+ }
- fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ va_start (ap, fmt);
+ vasprintf (&str2, fmt, ap);
+ va_end (ap);
+
+ gf_syslog (GF_ERR_DEV, priority,
+ "[%s:%d:%s] %d-%s: %s",
+ basename, line, function,
+ ((this->graph) ? this->graph->id:0), domain, str2);
+ goto err;
+ }
+#endif /* GF_USE_SYSLOG */
+
+ if (ctx->log.logrotate) {
+ ctx->log.logrotate = 0;
+
+ fd = open (ctx->log.filename,
+ O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
gf_log ("logrotate", GF_LOG_ERROR,
"%s", strerror (errno));
@@ -519,102 +794,135 @@ _gf_log (const char *domain, const char *file, const char *function, int line,
}
close (fd);
- new_logfile = fopen (filename, "a");
+ new_logfile = fopen (ctx->log.filename, "a");
if (!new_logfile) {
gf_log ("logrotate", GF_LOG_CRITICAL,
"failed to open logfile %s (%s)",
- filename, strerror (errno));
+ ctx->log.filename, strerror (errno));
goto log;
}
- if (logfile)
- fclose (logfile);
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile)
+ fclose (ctx->log.logfile);
+
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
+ }
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
- gf_log_logfile = logfile = new_logfile;
}
log:
ret = gettimeofday (&tv, NULL);
if (-1 == ret)
goto out;
+ va_start (ap, fmt);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
- tm = localtime (&tv.tv_sec);
-
- pthread_mutex_lock (&logfile_mutex);
- {
- va_start (ap, fmt);
+ ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %d-%s: ",
+ timestr, level_strings[level],
+ basename, line, function,
+ ((this->graph)?this->graph->id:0), domain);
+ if (-1 == ret) {
+ goto err;
+ }
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %d-%s: ",
- timestr, level_strings[level],
- basename, line, function,
- ((this->graph)?this->graph->id:0), domain);
- if (-1 == ret) {
- goto unlock;
- }
+ ret = vasprintf (&str2, fmt, ap);
+ if (-1 == ret) {
+ goto err;
+ }
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto unlock;
- }
+ va_end (ap);
- va_end (ap);
+ len = strlen (str1);
+ msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+ strcpy (msg, str1);
+ strcpy (msg + len, str2);
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ pthread_mutex_lock (&ctx->log.logfile_mutex);
+ {
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
+ if (ctx->log.logfile) {
+ fprintf (ctx->log.logfile, "%s\n", msg);
+ fflush (ctx->log.logfile);
} else {
fprintf (stderr, "%s\n", msg);
+ fflush (stderr);
}
#ifdef GF_LINUX_HOST_OS
/* We want only serious log in 'syslog', not our debug
and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
syslog ((level-1), "%s\n", msg);
#endif
}
-unlock:
- pthread_mutex_unlock (&logfile_mutex);
+ pthread_mutex_unlock (&ctx->log.logfile_mutex);
- if (msg) {
- GF_FREE (msg);
- }
+err:
+ GF_FREE (msg);
- if (str1)
- GF_FREE (str1);
+ GF_FREE (str1);
- if (str2)
- FREE (str2);
+ FREE (str2);
out:
return (0);
}
int
-gf_log_eh (void *data)
+_gf_log_eh (const char *function, const char *fmt, ...)
{
- int ret = -1;
+ int ret = -1;
+ va_list ap;
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ xlator_t *this = NULL;
- ret = eh_save_history (THIS->history, data);
+ this = THIS;
- return ret;
+ ret = gf_asprintf (&str1, "[%d] %s: ",
+ ((this->graph)?this->graph->id:0),
+ function);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ va_start (ap, fmt);
+
+ ret = vasprintf (&str2, fmt, ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ va_end (ap);
+
+ msg = GF_MALLOC (strlen (str1) + strlen (str2) + 1, gf_common_mt_char);
+ if (!msg) {
+ ret = -1;
+ goto out;
+ }
+
+ strcpy (msg, str1);
+ strcat (msg, str2);
+
+ ret = eh_save_history (this->history, msg);
+
+out:
+ GF_FREE (str1);
+
+ /* Use FREE instead of GF_FREE since str2 was allocated by vasprintf */
+ if (str2)
+ FREE (str2);
+
+ return ret;
}
int
@@ -622,8 +930,10 @@ gf_cmd_log_init (const char *filename)
{
int fd = -1;
xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
this = THIS;
+ ctx = this->ctx;
if (!filename){
gf_log (this->name, GF_LOG_CRITICAL, "gf_cmd_log_init: no "
@@ -631,19 +941,20 @@ gf_cmd_log_init (const char *filename)
return -1;
}
- cmd_log_filename = gf_strdup (filename);
- if (!cmd_log_filename) {
+ ctx->log.cmd_log_filename = gf_strdup (filename);
+ if (!ctx->log.cmd_log_filename) {
gf_log (this->name, GF_LOG_CRITICAL,
"gf_cmd_log_init: strdup error\n");
return -1;
}
/* close and reopen cmdlogfile for log rotate*/
- if (cmdlogfile) {
- fclose (cmdlogfile);
- cmdlogfile = NULL;
+ if (ctx->log.cmdlogfile) {
+ fclose (ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
}
- fd = open (cmd_log_filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ fd = open (ctx->log.cmd_log_filename,
+ O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
gf_log (this->name, GF_LOG_CRITICAL,
"%s", strerror (errno));
@@ -651,11 +962,11 @@ gf_cmd_log_init (const char *filename)
}
close (fd);
- cmdlogfile = fopen (cmd_log_filename, "a");
- if (!cmdlogfile){
+ ctx->log.cmdlogfile = fopen (ctx->log.cmd_log_filename, "a");
+ if (!ctx->log.cmdlogfile){
gf_log (this->name, GF_LOG_CRITICAL,
"gf_cmd_log_init: failed to open logfile \"%s\" "
- "(%s)\n", cmd_log_filename, strerror (errno));
+ "(%s)\n", ctx->log.cmd_log_filename, strerror (errno));
return -1;
}
return 0;
@@ -664,17 +975,18 @@ gf_cmd_log_init (const char *filename)
int
gf_cmd_log (const char *domain, const char *fmt, ...)
{
- va_list ap;
- struct tm *tm = NULL;
- char timestr[256];
+ va_list ap;
+ char timestr[64];
struct timeval tv = {0,};
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
-
- if (!cmdlogfile)
+ char *str1 = NULL;
+ char *str2 = NULL;
+ char *msg = NULL;
+ size_t len = 0;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = THIS->ctx;
+ if (!ctx->log.cmdlogfile)
return -1;
@@ -687,11 +999,8 @@ gf_cmd_log (const char *domain, const char *fmt, ...)
ret = gettimeofday (&tv, NULL);
if (ret == -1)
goto out;
-
- tm = localtime (&tv.tv_sec);
-
va_start (ap, fmt);
- strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm);
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
".%"GF_PRI_SUSECONDS, tv.tv_usec);
@@ -714,19 +1023,15 @@ gf_cmd_log (const char *domain, const char *fmt, ...)
strcpy (msg, str1);
strcpy (msg + len, str2);
- fprintf (cmdlogfile, "%s\n", msg);
- fflush (cmdlogfile);
+ fprintf (ctx->log.cmdlogfile, "%s\n", msg);
+ fflush (ctx->log.cmdlogfile);
out:
- if (msg) {
- GF_FREE (msg);
- }
+ GF_FREE (msg);
- if (str1)
- GF_FREE (str1);
+ GF_FREE (str1);
- if (str2)
- FREE (str2);
+ FREE (str2);
return (0);
}
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h
index bbf0d9a38..cc806a767 100644
--- a/libglusterfs/src/logging.h
+++ b/libglusterfs/src/logging.h
@@ -19,6 +19,7 @@
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
+#include <pthread.h>
#ifdef GF_DARWIN_HOST_OS
#define GF_PRI_FSBLK "u"
@@ -34,6 +35,7 @@
#define GF_PRI_BLKSIZE PRId32
#define GF_PRI_SIZET "zu"
+
#if 0
/* Syslog definitions :-) */
#define LOG_EMERG 0 /* system is unusable */
@@ -59,25 +61,67 @@ typedef enum {
GF_LOG_TRACE, /* full trace of operation */
} gf_loglevel_t;
-extern gf_loglevel_t gf_log_loglevel;
-extern char gf_log_xl_log_set;
+#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define DEFAULT_LOG_LEVEL GF_LOG_INFO
+
+typedef struct gf_log_handle_ {
+ pthread_mutex_t logfile_mutex;
+ uint8_t logrotate;
+ gf_loglevel_t loglevel;
+ int gf_log_syslog;
+ gf_loglevel_t sys_log_level;
+ char gf_log_xl_log_set;
+ char *filename;
+ FILE *logfile;
+ FILE *gf_log_logfile;
+ char *cmd_log_filename;
+ FILE *cmdlogfile;
+#ifdef GF_USE_SYSLOG
+ int log_control_file_found;
+ char *ident;
+#endif /* GF_USE_SYSLOG */
+
+} gf_log_handle_t;
+
+void gf_log_globals_init (void *ctx);
+int gf_log_init (void *data, const char *filename, const char *ident);
+
+void gf_log_logrotate (int signum);
+
+void gf_log_cleanup (void);
+
+int _gf_log (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 6, 7)));
+int _gf_log_callingfn (const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 6, 7)));
+
+int _gf_log_nomem (const char *domain, const char *file,
+ const char *function, int line, gf_loglevel_t level,
+ size_t size);
+
+int _gf_log_eh (const char *function, const char *fmt, ...);
+
+
#define FMT_WARN(fmt...) do { if (0) printf (fmt); } while (0)
#define gf_log(dom, levl, fmt...) do { \
FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
_gf_log (dom, __FILE__, __FUNCTION__, __LINE__, \
levl, ##fmt); \
} while (0)
+#define gf_log_eh(fmt...) do { \
+ FMT_WARN (fmt); \
+ _gf_log_eh (__FUNCTION__, ##fmt); \
+ } while (0)
+
#define gf_log_callingfn(dom, levl, fmt...) do { \
FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
_gf_log_callingfn (dom, __FILE__, __FUNCTION__, __LINE__, \
levl, ##fmt); \
} while (0)
@@ -85,8 +129,6 @@ extern char gf_log_xl_log_set;
/* No malloc or calloc should be called in this function */
#define gf_log_nomem(dom, levl, size) do { \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
_gf_log_nomem (dom, __FILE__, __FUNCTION__, __LINE__, \
levl, size); \
} while (0)
@@ -97,32 +139,6 @@ extern char gf_log_xl_log_set;
gf_log (args); \
}
-
-void gf_log_logrotate (int signum);
-int
-gf_log_eh (void *data);
-void gf_log_globals_init (void);
-int gf_log_init (const char *filename);
-void gf_log_cleanup (void);
-
-int _gf_log (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-int _gf_log_callingfn (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-
-int _gf_log_nomem (const char *domain, const char *file,
- const char *function, int line, gf_loglevel_t level,
- size_t size);
-
-int gf_log_from_client (const char *msg, char *identifier);
-
-void gf_log_lock (void);
-void gf_log_unlock (void);
-
void gf_log_disable_syslog (void);
void gf_log_enable_syslog (void);
gf_loglevel_t gf_log_get_loglevel (void);
@@ -130,6 +146,13 @@ void gf_log_set_loglevel (gf_loglevel_t level);
gf_loglevel_t gf_log_get_xl_loglevel (void *xl);
void gf_log_set_xl_loglevel (void *xl, gf_loglevel_t level);
+int gf_cmd_log (const char *domain, const char *fmt, ...)
+ __attribute__ ((__format__ (__printf__, 2, 3)));
+
+int gf_cmd_log_init (const char *filename);
+
+void set_sys_log_level (gf_loglevel_t level);
+
#define GF_DEBUG(xl, format, args...) \
gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
#define GF_INFO(xl, format, args...) \
@@ -139,10 +162,4 @@ void gf_log_set_xl_loglevel (void *xl, gf_loglevel_t level);
#define GF_ERROR(xl, format, args...) \
gf_log ((xl)->name, GF_LOG_ERROR, format, ##args)
-int gf_cmd_log (const char *domain, const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 2, 3)));
-
-int gf_cmd_log_init (const char *filename);
-
-void set_sys_log_level (gf_loglevel_t level);
#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 3e8100c64..b901dd7a8 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -30,40 +30,16 @@
#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT"
-static int gf_mem_acct_enable = 0;
-
-int
-gf_mem_acct_is_enabled ()
-{
- return gf_mem_acct_enable;
-}
-
void
-gf_mem_acct_enable_set ()
+gf_mem_acct_enable_set (void *data)
{
-#ifdef DEBUG
- gf_mem_acct_enable = 1;
- return;
-#endif
glusterfs_ctx_t *ctx = NULL;
- char *opt = NULL;
- long val = -1;
- gf_mem_acct_enable = 0;
+ ctx = data;
- ctx = glusterfs_ctx_get ();
+ GF_ASSERT (ctx);
- if (ctx->mem_accounting) {
- gf_mem_acct_enable = 1;
- return;
- }
-
- opt = getenv (GLUSTERFS_ENV_MEM_ACCT_STR);
- if (opt) {
- val = strtol (opt, NULL, 0);
- if (val)
- gf_mem_acct_enable = 1;
- }
+ ctx->mem_acct_enable = 1;
return;
}
@@ -80,17 +56,11 @@ gf_mem_set_acct_info (xlator_t *xl, char **alloc_ptr,
ptr = (char *) (*alloc_ptr);
- if (!xl) {
- GF_ASSERT (0);
- }
+ GF_ASSERT (xl != NULL);
- if (!(xl->mem_acct.rec)) {
- GF_ASSERT (0);
- }
+ GF_ASSERT (xl->mem_acct.rec != NULL);
- if (type > xl->mem_acct.num_types) {
- GF_ASSERT (0);
- }
+ GF_ASSERT (type <= xl->mem_acct.num_types);
LOCK(&xl->mem_acct.rec[type].lock);
{
@@ -130,7 +100,7 @@ __gf_calloc (size_t nmemb, size_t size, uint32_t type)
char *ptr = NULL;
xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
+ if (!THIS->ctx->mem_acct_enable)
return CALLOC (nmemb, size);
xl = THIS;
@@ -156,7 +126,7 @@ __gf_malloc (size_t size, uint32_t type)
char *ptr = NULL;
xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
+ if (!THIS->ctx->mem_acct_enable)
return MALLOC (size);
xl = THIS;
@@ -181,7 +151,7 @@ __gf_realloc (void *ptr, size_t size)
xlator_t *xl = NULL;
uint32_t type = 0;
- if (!gf_mem_acct_enable)
+ if (!THIS->ctx->mem_acct_enable)
return REALLOC (ptr, size);
tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
@@ -254,7 +224,7 @@ __gf_free (void *free_ptr)
uint32_t type = 0;
xlator_t *xl = NULL;
- if (!gf_mem_acct_enable) {
+ if (!THIS->ctx->mem_acct_enable) {
FREE (free_ptr);
return;
}
@@ -264,20 +234,16 @@ __gf_free (void *free_ptr)
ptr = (char *)free_ptr - 8 - 4;
- if (GF_MEM_HEADER_MAGIC != *(uint32_t *)ptr) {
- //Possible corruption, assert here
- GF_ASSERT (0);
- }
+ //Possible corruption, assert here
+ GF_ASSERT (GF_MEM_HEADER_MAGIC == *(uint32_t *)ptr);
*(uint32_t *)ptr = 0;
ptr = ptr - sizeof(xlator_t *);
memcpy (&xl, ptr, sizeof(xlator_t *));
- if (!xl) {
- //gf_free expects xl to be available
- GF_ASSERT (0);
- }
+ //gf_free expects xl to be available
+ GF_ASSERT (xl != NULL);
if (!xl->mem_acct.rec) {
ptr = (char *)free_ptr - GF_MEM_HEADER_SIZE;
@@ -290,11 +256,10 @@ __gf_free (void *free_ptr)
ptr = ptr - 4;
type = *(uint32_t *)ptr;
- if (GF_MEM_TRAILER_MAGIC != *(uint32_t *)
- ((char *)free_ptr + req_size)) {
- // This points to a memory overrun
- GF_ASSERT (0);
- }
+ // This points to a memory overrun
+ GF_ASSERT (GF_MEM_TRAILER_MAGIC ==
+ *(uint32_t *)((char *)free_ptr + req_size));
+
*(uint32_t *) ((char *)free_ptr + req_size) = 0;
LOCK (&xl->mem_acct.rec[type].lock);
@@ -365,7 +330,7 @@ mem_pool_new_fn (unsigned long sizeof_type,
mem_pool->pool_end = pool + (count * (padded_sizeof_type));
/* add this pool to the global list */
- ctx = glusterfs_ctx_get ();
+ ctx = THIS->ctx;
if (!ctx)
goto out;
@@ -453,8 +418,6 @@ mem_get (struct mem_pool *mem_pool)
mem_pool->max_stdalloc = mem_pool->curr_stdalloc;
ptr = GF_CALLOC (1, mem_pool->padded_sizeof_type,
gf_common_mt_mem_pool);
- gf_log_callingfn ("mem-pool", GF_LOG_DEBUG, "Mem pool is full. "
- "Callocing mem");
/* Memory coming from the heap need not be transformed from a
* chunkhead to a usable pointer since it is not coming from
diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h
index b3a25b25e..31f49f75c 100644
--- a/libglusterfs/src/mem-pool.h
+++ b/libglusterfs/src/mem-pool.h
@@ -110,6 +110,25 @@ void* __gf_default_realloc (void *oldptr, size_t size)
#define GF_FREE(free_ptr) __gf_free (free_ptr)
static inline
+char *gf_strndup (const char *src, size_t len)
+{
+ char *dup_str = NULL;
+
+ if (!src) {
+ goto out;
+ }
+
+ dup_str = GF_CALLOC (1, len + 1, gf_common_mt_strdup);
+ if (!dup_str) {
+ goto out;
+ }
+
+ memcpy (dup_str, src, len);
+out:
+ return dup_str;
+}
+
+static inline
char * gf_strdup (const char *src)
{
@@ -128,6 +147,21 @@ char * gf_strdup (const char *src)
return dup_str;
}
+static inline void *
+gf_memdup (const void *src, size_t size)
+{
+ void *dup_mem = NULL;
+
+ dup_mem = GF_CALLOC(1, size, gf_common_mt_strdup);
+ if (!dup_mem)
+ goto out;
+
+ memcpy (dup_mem, src, size);
+
+out:
+ return dup_mem;
+}
+
struct mem_pool {
struct list_head list;
int hot_count;
@@ -157,7 +191,6 @@ void *mem_get0 (struct mem_pool *pool);
void mem_pool_destroy (struct mem_pool *pool);
-int gf_mem_acct_is_enabled ();
-void gf_mem_acct_enable_set ();
+void gf_mem_acct_enable_set (void *ctx);
#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index 12379bf31..666bd120a 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -102,6 +102,24 @@ enum gf_common_mem_types_ {
gf_common_mt_buffer_t = 86,
gf_common_mt_circular_buffer_t = 87,
gf_common_mt_eh_t = 88,
- gf_common_mt_end = 89
+ gf_common_mt_store_handle_t = 89,
+ gf_common_mt_store_iter_t = 90,
+ gf_common_mt_drc_client_t = 91,
+ gf_common_mt_drc_globals_t = 92,
+ gf_common_mt_drc_rbtree_node_t = 93,
+ gf_common_mt_iov_base_t = 94,
+ gf_common_mt_groups_t = 95,
+ gf_common_mt_cliententry_t = 96,
+ gf_common_mt_clienttable_t = 97,
+ gf_common_mt_client_t = 98,
+ gf_common_mt_client_ctx = 99,
+ gf_common_mt_lock_table = 100,
+ gf_common_mt_locker = 101,
+ gf_common_mt_auxgids = 102,
+ gf_common_mt_syncopctx = 103,
+ gf_common_mt_uuid_t = 104,
+ gf_common_mt_mgmt_v3_lock_obj_t = 105,
+ gf_common_mt_txn_opinfo_obj_t = 106,
+ gf_common_mt_end = 107
};
#endif
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
index 0cec59fdb..842b6413a 100644
--- a/libglusterfs/src/options.c
+++ b/libglusterfs/src/options.c
@@ -53,7 +53,6 @@ out:
return ret;
}
-
static int
xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
@@ -71,7 +70,8 @@ xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
goto out;
}
- if ((opt->min == 0) && (opt->max == 0)) {
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
gf_log (xl->name, GF_LOG_TRACE,
"no range check required for 'option %s %s'",
key, value);
@@ -79,10 +79,28 @@ xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
goto out;
}
- if ((inputll < opt->min) || (inputll > opt->max)) {
+ if ((opt->validate == GF_OPT_VALIDATE_MIN)) {
+ if (inputll < opt->min) {
+ snprintf (errstr, 256,
+ "'%lld' in 'option %s %s' is smaller than "
+ "minimum value '%.0f'", inputll, key,
+ value, opt->min);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((opt->validate == GF_OPT_VALIDATE_MAX)) {
+ if ((inputll > opt->max)) {
+ snprintf (errstr, 256,
+ "'%lld' in 'option %s %s' is greater than "
+ "maximum value '%.0f'", inputll, key,
+ value, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((inputll < opt->min) || (inputll > opt->max)) {
snprintf (errstr, 256,
"'%lld' in 'option %s %s' is out of range "
- "[%"PRId64" - %"PRId64"]",
+ "[%.0f - %.0f]",
inputll, key, value, opt->min, opt->max);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
goto out;
@@ -101,7 +119,7 @@ xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
{
uint64_t size = 0;
- int ret = -1;
+ int ret = 0;
char errstr[256];
/* Check the range */
@@ -110,6 +128,7 @@ xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
"invalid number format \"%s\" in option \"%s\"",
value, key);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ ret = -1;
goto out;
}
@@ -117,30 +136,26 @@ xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
gf_log (xl->name, GF_LOG_TRACE,
"no range check required for 'option %s %s'",
key, value);
- ret = 0;
goto out;
}
if ((size < opt->min) || (size > opt->max)) {
- if (strncmp (key, "cache-size", 10) == 0) {
+ if ((strncmp (key, "cache-size", 10) == 0) &&
+ (size > opt->max)) {
snprintf (errstr, 256, "Cache size %"PRId64" is out of "
- "range [%"PRId64" - %"PRId64"]",
+ "range [%.0f - %.0f]",
size, opt->min, opt->max);
- //*op_errstr = gf_strdup (errstr);
gf_log (xl->name, GF_LOG_WARNING, "%s", errstr);
- ret = 0;
- goto out;
} else {
snprintf (errstr, 256,
"'%"PRId64"' in 'option %s %s' "
- "is out of range [%"PRId64" - %"PRId64"]",
+ "is out of range [%.0f - %.0f]",
size, key, value, opt->min, opt->max);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ ret = -1;
}
}
- ret = 0;
out:
if (ret && op_errstr)
*op_errstr = gf_strdup (errstr);
@@ -213,6 +228,46 @@ out:
return ret;
}
+void
+set_error_str (char *errstr, size_t len, volume_option_t *opt, const char *key,
+ const char *value)
+{
+ int i = 0;
+ char given_array[4096] = {0,};
+
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
+ strcat (given_array, opt->value[i]);
+ if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) &&
+ (opt->value[i]))
+ strcat (given_array, ", ");
+ else
+ strcat (given_array, ".");
+ }
+ snprintf (errstr, len, "option %s %s: '%s' is not valid "
+ "(possible options are %s)", key, value, value, given_array);
+ return;
+}
+
+int
+is_all_whitespaces (const char *value)
+{
+ int i = 0;
+ size_t len = 0;
+
+ if (value == NULL)
+ return -1;
+
+ len = strlen (value);
+
+ for (i = 0; i < len; i++) {
+ if (value[i] == ' ')
+ continue;
+ else
+ return 0;
+ }
+
+ return 1;
+}
static int
xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
@@ -220,8 +275,7 @@ xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
{
int ret = -1;
int i = 0;
- char errstr[256];
- char given_array[4096] = {0,};
+ char errstr[4096] = {0,};
/* Check if the '*str' is valid */
if (GF_OPTION_LIST_EMPTY(opt)) {
@@ -229,6 +283,9 @@ xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
goto out;
}
+ if (is_all_whitespaces (value) == 1)
+ goto out;
+
for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
#ifdef GF_DARWIN_HOST_OS
if (fnmatch (opt->value[i], value, 0) == 0) {
@@ -243,8 +300,8 @@ xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
#endif
}
- if (((i < ZR_OPTION_MAX_ARRAY_SIZE) && (!opt->value[i])) ||
- (i == ZR_OPTION_MAX_ARRAY_SIZE)) {
+ if ((i == ZR_OPTION_MAX_ARRAY_SIZE) || (!opt->value[i]))
+ goto out;
/* enter here only if
* 1. reached end of opt->value array and haven't
* validated input
@@ -254,26 +311,15 @@ xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
* matched all possible input values.
*/
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
- strcat (given_array, opt->value[i]);
- if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) &&
- (opt->value[i]))
- strcat (given_array, ", ");
- else
- strcat (given_array, ".");
- }
- snprintf (errstr, 256,
- "option %s %s: '%s' is not valid "
- "(possible options are %s)",
- key, value, value, given_array);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
ret = 0;
+
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
+ if (ret) {
+ set_error_str (errstr, sizeof (errstr), opt, key, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ if (op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ }
return ret;
}
@@ -282,10 +328,9 @@ static int
xlator_option_validate_percent (xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- uint32_t percent = 0;
-
+ double percent = 0;
+ int ret = -1;
+ char errstr[256];
/* Check if the value is valid percentage */
if (gf_string2percent (value, &percent) != 0) {
@@ -296,9 +341,9 @@ xlator_option_validate_percent (xlator_t *xl, const char *key, const char *value
goto out;
}
- if ((percent < 0) || (percent > 100)) {
+ if ((percent < 0.0) || (percent > 100.0)) {
snprintf (errstr, 256,
- "'%d' in 'option %s %s' is out of range [0 - 100]",
+ "'%lf' in 'option %s %s' is out of range [0 - 100]",
percent, key, value);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
goto out;
@@ -338,8 +383,7 @@ xlator_option_validate_percent_or_sizet (xlator_t *xl, const char *key,
if ((size < opt->min) || (size > opt->max)) {
snprintf (errstr, 256,
"'%"PRId64"' in 'option %s %s'"
- " is out of range [%"PRId64" -"
- " %"PRId64"]",
+ " is out of range [%.0f - %.0f]",
size, key, value, opt->min, opt->max);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
goto out;
@@ -393,7 +437,7 @@ xlator_option_validate_time (xlator_t *xl, const char *key, const char *value,
if ((input_time < opt->min) || (input_time > opt->max)) {
snprintf (errstr, 256,
"'%"PRIu32"' in 'option %s %s' is "
- "out of range [%"PRId64" - %"PRId64"]",
+ "out of range [%.0f - %.0f]",
input_time, key, value,
opt->min, opt->max);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
@@ -412,28 +456,21 @@ static int
xlator_option_validate_double (xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- double val = 0.0;
-
- /* Check if the value is valid double */
- if (gf_string2double (value, &val) != 0) {
- snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
+ double input = 0.0;
+ int ret = -1;
+ char errstr[256];
- if (val < 0.0) {
+ /* Check the range */
+ if (gf_string2double (value, &input) != 0) {
snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
+ "invalid number format \"%s\" in option \"%s\"",
value, key);
gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
goto out;
}
- if ((opt->min == 0) && (opt->max == 0)) {
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
gf_log (xl->name, GF_LOG_TRACE,
"no range check required for 'option %s %s'",
key, value);
@@ -441,6 +478,33 @@ xlator_option_validate_double (xlator_t *xl, const char *key, const char *value,
goto out;
}
+ if ((opt->validate == GF_OPT_VALIDATE_MIN)) {
+ if (input < opt->min) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is smaller than "
+ "minimum value '%f'", input, key,
+ value, opt->min);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((opt->validate == GF_OPT_VALIDATE_MAX)) {
+ if ((input > opt->max)) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is greater than "
+ "maximum value '%f'", input, key,
+ value, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+ } else if ((input < opt->min) || (input > opt->max)) {
+ snprintf (errstr, 256,
+ "'%f' in 'option %s %s' is out of range "
+ "[%f - %f]",
+ input, key, value, opt->min, opt->max);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ goto out;
+ }
+
ret = 0;
out:
if (ret && op_errstr)
@@ -480,34 +544,32 @@ xlator_option_validate_addr_list (xlator_t *xl, const char *key,
char *dup_val = NULL;
char *addr_tok = NULL;
char *save_ptr = NULL;
- char errstr[256];
+ char errstr[4096] = {0,};
dup_val = gf_strdup (value);
- if (!dup_val) {
- ret = -1;
- snprintf (errstr, 256, "internal error, out of memory.");
+ if (!dup_val)
goto out;
- }
addr_tok = strtok_r (dup_val, ",", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
while (addr_tok) {
- if (!valid_internet_address (addr_tok, _gf_true)) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid "
- "internet-address-list",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- ret = -1;
+ if (!valid_internet_address (addr_tok, _gf_true))
goto out;
- }
+
addr_tok = strtok_r (NULL, ",", &save_ptr);
}
ret = 0;
- out:
- if (op_errstr && ret)
- *op_errstr = gf_strdup (errstr);
- if (dup_val)
- GF_FREE (dup_val);
+
+out:
+ if (ret) {
+ snprintf (errstr, sizeof (errstr), "option %s %s: '%s' is not "
+ "a valid internet-address-list", key, value, value);
+ gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ if (op_errstr)
+ *op_errstr = gf_strdup (errstr);
+ }
+ GF_FREE (dup_val);
return ret;
}
@@ -563,6 +625,10 @@ validate_list_elements (const char *string, volume_option_t *opt,
goto out;
str_ptr = strtok_r (dup_string, ",", &str_sav);
+ if (str_ptr == NULL) {
+ ret = -1;
+ goto out;
+ }
while (str_ptr) {
key = strtok_r (str_ptr, ":", &substr_sav);
@@ -588,9 +654,9 @@ validate_list_elements (const char *string, volume_option_t *opt,
str_ptr = strtok_r (NULL, ",", &str_sav);
substr_sav = NULL;
}
+
out:
- if (dup_string)
- GF_FREE (dup_string);
+ GF_FREE (dup_string);
gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
return ret;
}
@@ -695,7 +761,7 @@ out:
}
-static volume_option_t *
+volume_option_t *
xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key)
{
volume_option_t *opt = NULL;
@@ -744,7 +810,7 @@ xlator_volume_option_get (xlator_t *xl, const char *key)
}
-static void
+static int
xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
{
xlator_t *xl = NULL;
@@ -765,7 +831,7 @@ xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
opt = xlator_volume_option_get_list (vol_opt, key);
if (!opt)
- return;
+ return 0;
ret = xlator_option_validate (xl, key, value->data, opt, &errstr);
if (ret)
@@ -783,7 +849,7 @@ xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
dict_set (dict, opt->key[0], value);
dict_del (dict, key);
}
- return;
+ return 0;
}
@@ -988,11 +1054,8 @@ out:
static int
-not_null (char *in, char **out)
+pass (char *in, char **out)
{
- if (!in || !out)
- return -1;
-
*out = in;
return 0;
}
@@ -1013,46 +1076,52 @@ xl_by_name (char *in, xlator_t **out)
static int
-pc_or_size (char *in, uint64_t *out)
+pc_or_size (char *in, double *out)
{
- uint32_t pc = 0;
+ double pc = 0;
int ret = 0;
+ uint64_t size = 0;
if (gf_string2percent (in, &pc) == 0) {
- if (pc > 100) {
- ret = gf_string2bytesize (in, out);
+ if (pc > 100.0) {
+ ret = gf_string2bytesize (in, &size);
+ if (!ret)
+ *out = size;
} else {
*out = pc;
}
} else {
- ret = gf_string2bytesize (in, out);
+ ret = gf_string2bytesize (in, &size);
+ if (!ret)
+ *out = size;
}
return ret;
}
-
-DEFINE_INIT_OPT(char *, str, not_null);
+DEFINE_INIT_OPT(char *, str, pass);
DEFINE_INIT_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_INIT_OPT(int64_t, int64, gf_string2int64);
DEFINE_INIT_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_INIT_OPT(int32_t, int32, gf_string2int32);
DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize);
-DEFINE_INIT_OPT(uint32_t, percent, gf_string2percent);
-DEFINE_INIT_OPT(uint64_t, percent_or_size, pc_or_size);
+DEFINE_INIT_OPT(double, percent, gf_string2percent);
+DEFINE_INIT_OPT(double, percent_or_size, pc_or_size);
DEFINE_INIT_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_INIT_OPT(xlator_t *, xlator, xl_by_name);
-DEFINE_INIT_OPT(char *, path, not_null);
+DEFINE_INIT_OPT(char *, path, pass);
+DEFINE_INIT_OPT(double, double, gf_string2double);
-DEFINE_RECONF_OPT(char *, str, not_null);
+DEFINE_RECONF_OPT(char *, str, pass);
DEFINE_RECONF_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_RECONF_OPT(int64_t, int64, gf_string2int64);
DEFINE_RECONF_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_RECONF_OPT(int32_t, int32, gf_string2int32);
DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize);
-DEFINE_RECONF_OPT(uint32_t, percent, gf_string2percent);
-DEFINE_RECONF_OPT(uint64_t, percent_or_size, pc_or_size);
+DEFINE_RECONF_OPT(double, percent, gf_string2percent);
+DEFINE_RECONF_OPT(double, percent_or_size, pc_or_size);
DEFINE_RECONF_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_RECONF_OPT(xlator_t *, xlator, xl_by_name);
-DEFINE_RECONF_OPT(char *, path, not_null);
+DEFINE_RECONF_OPT(char *, path, pass);
+DEFINE_RECONF_OPT(double, double, gf_string2double);
diff --git a/libglusterfs/src/options.h b/libglusterfs/src/options.h
index 01d2a9d5f..e2a25baa9 100644
--- a/libglusterfs/src/options.h
+++ b/libglusterfs/src/options.h
@@ -41,22 +41,32 @@ typedef enum {
GF_OPTION_TYPE_MAX,
} volume_option_type_t;
+typedef enum {
+ GF_OPT_VALIDATE_BOTH = 0,
+ GF_OPT_VALIDATE_MIN,
+ GF_OPT_VALIDATE_MAX,
+} opt_validate_type_t;
#define ZR_VOLUME_MAX_NUM_KEY 4
#define ZR_OPTION_MAX_ARRAY_SIZE 64
/* Each translator should define this structure */
typedef struct volume_options {
- char *key[ZR_VOLUME_MAX_NUM_KEY];
+ char *key[ZR_VOLUME_MAX_NUM_KEY];
/* different key, same meaning */
- volume_option_type_t type;
- int64_t min; /* 0 means no range */
- int64_t max; /* 0 means no range */
- char *value[ZR_OPTION_MAX_ARRAY_SIZE];
+ volume_option_type_t type;
+ double min; /* 0 means no range */
+ double max; /* 0 means no range */
+ char *value[ZR_OPTION_MAX_ARRAY_SIZE];
/* If specified, will check for one of
the value from this array */
- char *default_value;
- char *description; /* about the key */
+ char *default_value;
+ char *description; /* about the key */
+ /* Required for int options where only the min value
+ * is given and is 0. This will cause validation not to
+ * happen
+ */
+ opt_validate_type_t validate;
} volume_option_t;
@@ -83,6 +93,9 @@ int xlator_options_validate (xlator_t *xl, dict_t *options, char **errstr);
volume_option_t *
xlator_volume_option_get (xlator_t *xl, const char *key);
+volume_option_t *
+xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key);
+
#define DECLARE_INIT_OPT(type_t, type) \
int \
@@ -95,11 +108,12 @@ DECLARE_INIT_OPT(int64_t, int64);
DECLARE_INIT_OPT(uint32_t, uint32);
DECLARE_INIT_OPT(int32_t, int32);
DECLARE_INIT_OPT(uint64_t, size);
-DECLARE_INIT_OPT(uint32_t, percent);
-DECLARE_INIT_OPT(uint64_t, percent_or_size);
+DECLARE_INIT_OPT(double, percent);
+DECLARE_INIT_OPT(double, percent_or_size);
DECLARE_INIT_OPT(gf_boolean_t, bool);
DECLARE_INIT_OPT(xlator_t *, xlator);
DECLARE_INIT_OPT(char *, path);
+DECLARE_INIT_OPT(double, double);
#define DEFINE_INIT_OPT(type_t, type, conv) \
@@ -131,6 +145,7 @@ xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
if (!value) { \
gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
key); \
+ *val_p = (type_t)0; \
return 0; \
} \
if (value == def_value) { \
@@ -173,11 +188,12 @@ DECLARE_RECONF_OPT(int64_t, int64);
DECLARE_RECONF_OPT(uint32_t, uint32);
DECLARE_RECONF_OPT(int32_t, int32);
DECLARE_RECONF_OPT(uint64_t, size);
-DECLARE_RECONF_OPT(uint32_t, percent);
-DECLARE_RECONF_OPT(uint64_t, percent_or_size);
+DECLARE_RECONF_OPT(double, percent);
+DECLARE_RECONF_OPT(double, percent_or_size);
DECLARE_RECONF_OPT(gf_boolean_t, bool);
DECLARE_RECONF_OPT(xlator_t *, xlator);
DECLARE_RECONF_OPT(char *, path);
+DECLARE_RECONF_OPT(double, double);
#define DEFINE_RECONF_OPT(type_t, type, conv) \
@@ -209,6 +225,7 @@ xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key, \
if (!value) { \
gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
key); \
+ *val_p = (type_t)0; \
return 0; \
} \
if (value == def_value) { \
diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c
index 4f04fed93..0d7b9e521 100644
--- a/libglusterfs/src/rbthash.c
+++ b/libglusterfs/src/rbthash.c
@@ -208,8 +208,7 @@ rbthash_deinit_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
if (!entry)
return;
- if (entry->key)
- GF_FREE (entry->key);
+ GF_FREE (entry->key);
if (tbl) {
if ((entry->data) && (tbl->dfunc))
@@ -228,7 +227,7 @@ rbthash_deinit_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
}
-inline struct rbthash_bucket *
+static inline struct rbthash_bucket *
rbthash_entry_bucket (rbthash_table_t *tbl, rbthash_entry_t * entry)
{
int nbucket = 0;
@@ -302,7 +301,7 @@ err:
return ret;
}
-inline struct rbthash_bucket *
+static inline struct rbthash_bucket *
rbthash_key_bucket (rbthash_table_t *tbl, void *key, int keylen)
{
uint32_t keyhash = 0;
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
index 34d75df69..4fd2a3a0d 100644
--- a/libglusterfs/src/run.c
+++ b/libglusterfs/src/run.c
@@ -67,7 +67,10 @@ runner_chio (runner_t *runner, int fd)
{
GF_ASSERT (fd > 0 && fd < 3);
- return runner->chio[fd];
+ if ((fd > 0) && (fd < 3))
+ return runner->chio[fd];
+
+ return NULL;
}
static void
@@ -184,7 +187,7 @@ runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
if (len > 0)
buf[len - 1] = '\0';
- gf_log (dom, lvl, "%s: %s", msg, buf);
+ gf_log_callingfn (dom, lvl, "%s: %s", msg, buf);
GF_FREE (buf);
}
@@ -194,7 +197,8 @@ runner_redir (runner_t *runner, int fd, int tgt_fd)
{
GF_ASSERT (fd > 0 && fd < 3);
- runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
+ if ((fd > 0) && (fd < 3))
+ runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
}
int
@@ -388,6 +392,25 @@ runner_run (runner_t *runner)
return runner_run_generic (runner, runner_end);
}
+
+int
+runner_run_nowait (runner_t *runner)
+{
+ int pid;
+
+ pid = fork ();
+
+ if (!pid) {
+ setsid ();
+ _exit (runner_start (runner));
+ }
+
+ if (pid > 0)
+ runner->chpid = pid;
+ return runner_end (runner);
+}
+
+
int
runner_run_reuse (runner_t *runner)
{
@@ -467,7 +490,7 @@ main (int argc, char **argv)
printf ("%d %d [%s]\n", ret, errno, strerror (errno));
TBANNER ("output redirection");
- fd = open ("/tmp/foof", O_WRONLY|O_CREAT|O_TRUNC, 0600);
+ fd = mkstemp ("/tmp/foof");
assert (fd != -1);
runinit (&runner);
runner_add_args (&runner, "echo", "foo", NULL);
diff --git a/libglusterfs/src/run.h b/libglusterfs/src/run.h
index 508c59c13..d7554ef6d 100644
--- a/libglusterfs/src/run.h
+++ b/libglusterfs/src/run.h
@@ -166,6 +166,12 @@ int runner_end_reuse (runner_t *runner);
int runner_run (runner_t *runner);
/**
+ * variant for runner_run() which does not wait for acknowledgement
+ * from child, and always assumes it succeeds.
+ */
+int runner_run_nowait (runner_t *runner);
+
+/**
* variant of runner_run() which does not free internal data
* so that the runner instance can be run again.
*
diff --git a/libglusterfs/src/scheduler.c b/libglusterfs/src/scheduler.c
deleted file mode 100644
index 9817f3e26..000000000
--- a/libglusterfs/src/scheduler.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <netdb.h>
-#include "xlator.h"
-#include "scheduler.h"
-#include "list.h"
-
-struct sched_ops *
-get_scheduler (xlator_t *xl, const char *name)
-{
- struct sched_ops *tmp_sched = NULL;
- volume_opt_list_t *vol_opt = NULL;
- char *sched_file = NULL;
- void *handle = NULL;
- int ret = 0;
-
- if (name == NULL) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "'name' not specified, EINVAL");
- return NULL;
- }
-
- ret = gf_asprintf (&sched_file, "%s/%s.so", SCHEDULERDIR, name);
- if (-1 == ret) {
- gf_log ("scheduler", GF_LOG_ERROR, "asprintf failed");
- return NULL;
- }
-
- gf_log ("scheduler", GF_LOG_DEBUG,
- "attempt to load file %s.so", name);
-
- handle = dlopen (sched_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlopen(%s): %s", sched_file, dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- tmp_sched = dlsym (handle, "sched");
- if (!tmp_sched) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlsym(sched) on %s", dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("scheduler", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (validate_xlator_volume_options (xl, vol_opt->given_opt)
- == -1) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "volume option validation failed");
- GF_FREE(sched_file);
- return NULL;
- }
- }
- GF_FREE(sched_file);
- GF_FREE (vol_opt);
-
- return tmp_sched;
-}
diff --git a/libglusterfs/src/scheduler.h b/libglusterfs/src/scheduler.h
deleted file mode 100644
index 2f1e12205..000000000
--- a/libglusterfs/src/scheduler.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _SCHEDULER_H
-#define _SCHEDULER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-struct sched_ops {
- int32_t (*init) (xlator_t *this);
- void (*fini) (xlator_t *this);
- void (*update) (xlator_t *this);
- xlator_t *(*schedule) (xlator_t *this, const void *path);
- void (*notify) (xlator_t *xl, int32_t event, void *data);
- int32_t (*mem_acct_init) (xlator_t *this);
-};
-
-extern struct sched_ops *get_scheduler (xlator_t *xl, const char *name);
-
-#endif /* _SCHEDULER_H */
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index f922be83c..37b338f51 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -26,6 +26,44 @@ int call_frames_count (call_frame_t *call_frame)
return count;
}
+call_frame_t *
+create_frame (xlator_t *xl, call_pool_t *pool)
+{
+ call_stack_t *stack = NULL;
+
+ if (!xl || !pool) {
+ return NULL;
+ }
+
+ stack = mem_get0 (pool->stack_mem_pool);
+ if (!stack)
+ return NULL;
+
+ stack->pool = pool;
+ stack->frames.root = stack;
+ stack->frames.this = xl;
+ stack->ctx = xl->ctx;
+
+ if (stack->ctx->measure_latency) {
+ if (gettimeofday (&stack->tv, NULL) == -1)
+ gf_log ("stack", GF_LOG_ERROR, "gettimeofday () failed."
+ " (%s)", strerror (errno));
+ memcpy (&stack->frames.begin, &stack->tv, sizeof (stack->tv));
+ }
+
+ LOCK (&pool->lock);
+ {
+ list_add (&stack->all_frames, &pool->all_frames);
+ pool->cnt++;
+ }
+ UNLOCK (&pool->lock);
+
+ LOCK_INIT (&stack->frames.lock);
+ LOCK_INIT (&stack->stack_lock);
+
+ return &stack->frames;
+}
+
void
gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
{
@@ -34,6 +72,7 @@ gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
va_list ap;
call_frame_t my_frame;
int ret = -1;
+ char timestr[256] = {0,};
if (!call_frame)
return;
@@ -47,15 +86,21 @@ gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
va_end(ap);
ret = TRY_LOCK(&call_frame->lock);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call frame"
- " errno: %s", strerror (errno));
- return;
- }
+ if (ret)
+ goto out;
memcpy(&my_frame, call_frame, sizeof(my_frame));
UNLOCK(&call_frame->lock);
+ if (my_frame.this->ctx->measure_latency) {
+ gf_time_fmt (timestr, sizeof timestr, my_frame.begin.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, my_frame.begin.tv_usec);
+ gf_proc_dump_write("frame-creation-time", "%s", timestr);
+ }
+
gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
gf_proc_dump_write("translator", "%s", my_frame.this->name);
gf_proc_dump_write("complete", "%d", my_frame.complete);
@@ -73,6 +118,14 @@ gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
if (my_frame.unwind_to)
gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
+
+ ret = 0;
+out:
+ if (ret) {
+ gf_proc_dump_write("Unable to dump the frame information",
+ "(Lock acquisition failed) %p", my_frame);
+ return;
+ }
}
@@ -83,6 +136,7 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
va_list ap;
call_frame_t *trav;
int32_t cnt, i;
+ char timestr[256] = {0,};
if (!call_stack)
return;
@@ -96,6 +150,15 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
va_end(ap);
+ if (call_stack->ctx->measure_latency) {
+ gf_time_fmt (timestr, sizeof timestr, call_stack->tv.tv_sec,
+ gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, call_stack->tv.tv_usec);
+ gf_proc_dump_write("callstack-creation-time", "%s", timestr);
+ }
+
gf_proc_dump_write("uid", "%d", call_stack->uid);
gf_proc_dump_write("gid", "%d", call_stack->gid);
gf_proc_dump_write("pid", "%d", call_stack->pid);
@@ -103,9 +166,10 @@ gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
gf_proc_dump_write("lk-owner", "%s", lkowner_utoa (&call_stack->lk_owner));
if (call_stack->type == GF_OP_TYPE_FOP)
- gf_proc_dump_write("op", "%s", gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- gf_proc_dump_write("op", "%s", gf_mgmt_list[call_stack->op]);
+ gf_proc_dump_write("op", "%s",
+ (char *)gf_fop_list[call_stack->op]);
+ else
+ gf_proc_dump_write("op", "stack");
gf_proc_dump_write("type", "%d", call_stack->type);
gf_proc_dump_write("cnt", "%d", cnt);
@@ -128,19 +192,18 @@ gf_proc_dump_pending_frames (call_pool_t *call_pool)
call_stack_t *trav = NULL;
int i = 1;
int ret = -1;
+ gf_boolean_t section_added = _gf_true;
if (!call_pool)
return;
ret = TRY_LOCK (&(call_pool->lock));
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call pool"
- " errno: %d", errno);
- return;
- }
+ if (ret)
+ goto out;
gf_proc_dump_add_section("global.callpool");
+ section_added = _gf_true;
gf_proc_dump_write("callpool_address","%p", call_pool);
gf_proc_dump_write("callpool.cnt","%d", call_pool->cnt);
@@ -151,6 +214,17 @@ gf_proc_dump_pending_frames (call_pool_t *call_pool)
i++;
}
UNLOCK (&(call_pool->lock));
+
+ ret = 0;
+out:
+ if (ret) {
+ if (_gf_false == section_added)
+ gf_proc_dump_add_section("global.callpool");
+ gf_proc_dump_write("Unable to dump the callpool",
+ "(Lock acquisition failed) %p",
+ call_pool);
+ }
+ return;
}
void
@@ -277,10 +351,10 @@ gf_proc_dump_call_stack_to_dict (call_stack_t *call_stack,
snprintf (key, sizeof (key), "%s.op", prefix);
if (call_stack->type == GF_OP_TYPE_FOP)
ret = dict_set_str (dict, key,
- gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- ret = dict_set_str (dict, key,
- gf_mgmt_list[call_stack->op]);
+ (char *)gf_fop_list[call_stack->op]);
+ else
+ ret = dict_set_str (dict, key, "other");
+
if (ret)
return;
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
index 63307192a..f2d2ef950 100644
--- a/libglusterfs/src/stack.h
+++ b/libglusterfs/src/stack.h
@@ -25,8 +25,8 @@ struct _call_stack_t;
typedef struct _call_stack_t call_stack_t;
struct _call_frame_t;
typedef struct _call_frame_t call_frame_t;
-struct _call_pool_t;
-typedef struct _call_pool_t call_pool_t;
+struct call_pool;
+typedef struct call_pool call_pool_t;
#include <sys/time.h>
@@ -36,6 +36,7 @@ typedef struct _call_pool_t call_pool_t;
#include "common-utils.h"
#include "globals.h"
#include "lkowner.h"
+#include "client_t.h"
#define NFS_PID 1
#define LOW_PRIO_PROC_PID -1
@@ -46,7 +47,7 @@ typedef int32_t (*ret_fn_t) (call_frame_t *frame,
int32_t op_errno,
...);
-struct _call_pool_t {
+struct call_pool {
union {
struct list_head all_frames;
struct {
@@ -82,6 +83,8 @@ struct _call_frame_t {
const char *unwind_to;
};
+#define SMALL_GROUP_COUNT 128
+
struct _call_stack_t {
union {
struct list_head all_frames;
@@ -92,20 +95,24 @@ struct _call_stack_t {
};
call_pool_t *pool;
gf_lock_t stack_lock;
- void *trans;
+ client_t *client;
uint64_t unique;
void *state; /* pointer to request state */
uid_t uid;
gid_t gid;
pid_t pid;
uint16_t ngrps;
- uint32_t groups[GF_MAX_AUX_GROUPS];
+ uint32_t groups_small[SMALL_GROUP_COUNT];
+ uint32_t *groups_large;
+ uint32_t *groups;
gf_lkowner_t lk_owner;
+ glusterfs_ctx_t *ctx;
call_frame_t frames;
int32_t op;
int8_t type;
+ struct timeval tv;
};
@@ -122,11 +129,10 @@ struct _call_stack_t {
struct xlator_fops;
void
-gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops,
- void *fn);
+gf_latency_begin (call_frame_t *frame, void *fn);
void
-gf_update_latency (call_frame_t *frame);
+gf_latency_end (call_frame_t *frame);
static inline void
FRAME_DESTROY (call_frame_t *frame)
@@ -173,6 +179,9 @@ STACK_DESTROY (call_stack_t *stack)
while (stack->frames.next) {
FRAME_DESTROY (stack->frames.next);
}
+
+ GF_FREE (stack->groups_large);
+
mem_put (stack);
if (local)
@@ -236,6 +245,7 @@ STACK_RESET (call_stack_t *stack)
_new->wind_from = __FUNCTION__; \
_new->wind_to = #fn; \
_new->unwind_to = #rfn; \
+ \
LOCK_INIT (&_new->lock); \
LOCK(&frame->root->stack_lock); \
{ \
@@ -249,11 +259,27 @@ STACK_RESET (call_stack_t *stack)
UNLOCK(&frame->root->stack_lock); \
old_THIS = THIS; \
THIS = obj; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_begin (_new, fn); \
fn (_new, obj, params); \
THIS = old_THIS; \
} while (0)
+/* make a call without switching frames */
+#define STACK_WIND_TAIL(frame, obj, fn, params ...) \
+ do { \
+ xlator_t *old_THIS = NULL; \
+ \
+ frame->this = obj; \
+ frame->wind_to = #fn; \
+ old_THIS = THIS; \
+ THIS = obj; \
+ fn (frame, obj, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+
/* make a call with a cookie */
#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params ...) \
do { \
@@ -288,6 +314,8 @@ STACK_RESET (call_stack_t *stack)
fn##_cbk = rfn; \
old_THIS = THIS; \
THIS = obj; \
+ if (obj->ctx->measure_latency) \
+ gf_latency_begin (_new, fn); \
fn (_new, obj, params); \
THIS = old_THIS; \
} while (0)
@@ -314,6 +342,8 @@ STACK_RESET (call_stack_t *stack)
THIS = _parent->this; \
frame->complete = _gf_true; \
frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_end (frame); \
fn (_parent, frame->cookie, _parent->this, params); \
THIS = old_THIS; \
} while (0)
@@ -341,11 +371,31 @@ STACK_RESET (call_stack_t *stack)
THIS = _parent->this; \
frame->complete = _gf_true; \
frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) \
+ gf_latency_end (frame); \
fn (_parent, frame->cookie, _parent->this, params); \
THIS = old_THIS; \
} while (0)
+static inline int
+call_stack_alloc_groups (call_stack_t *stack, int ngrps)
+{
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ stack->groups = stack->groups_small;
+ } else {
+ stack->groups_large = GF_CALLOC (sizeof (gid_t), ngrps,
+ gf_common_mt_groups_t);
+ if (!stack->groups_large)
+ return -1;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+
+ return 0;
+}
+
static inline call_frame_t *
copy_frame (call_frame_t *frame)
{
@@ -369,14 +419,27 @@ copy_frame (call_frame_t *frame)
newstack->ngrps = oldstack->ngrps;
newstack->op = oldstack->op;
newstack->type = oldstack->type;
+ if (call_stack_alloc_groups (newstack, oldstack->ngrps) != 0) {
+ mem_put (newstack);
+ return NULL;
+ }
memcpy (newstack->groups, oldstack->groups,
- sizeof (gid_t) * GF_MAX_AUX_GROUPS);
+ sizeof (gid_t) * oldstack->ngrps);
newstack->unique = oldstack->unique;
newstack->frames.this = frame->this;
newstack->frames.root = newstack;
newstack->pool = oldstack->pool;
newstack->lk_owner = oldstack->lk_owner;
+ newstack->ctx = oldstack->ctx;
+
+ if (newstack->ctx->measure_latency) {
+ if (gettimeofday (&newstack->tv, NULL) == -1)
+ gf_log ("stack", GF_LOG_ERROR, "gettimeofday () failed."
+ " (%s)", strerror (errno));
+ memcpy (&newstack->frames.begin, &newstack->tv,
+ sizeof (newstack->tv));
+ }
LOCK_INIT (&newstack->frames.lock);
LOCK_INIT (&newstack->stack_lock);
@@ -391,39 +454,9 @@ copy_frame (call_frame_t *frame)
return &newstack->frames;
}
-
-static inline call_frame_t *
-create_frame (xlator_t *xl, call_pool_t *pool)
-{
- call_stack_t *stack = NULL;
-
- if (!xl || !pool) {
- return NULL;
- }
-
- stack = mem_get0 (pool->stack_mem_pool);
- if (!stack)
- return NULL;
-
- stack->pool = pool;
- stack->frames.root = stack;
- stack->frames.this = xl;
-
- LOCK (&pool->lock);
- {
- list_add (&stack->all_frames, &pool->all_frames);
- pool->cnt++;
- }
- UNLOCK (&pool->lock);
-
- LOCK_INIT (&stack->frames.lock);
- LOCK_INIT (&stack->stack_lock);
-
- return &stack->frames;
-}
-
void gf_proc_dump_pending_frames(call_pool_t *call_pool);
void gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool,
dict_t *dict);
+call_frame_t *create_frame (xlator_t *xl, call_pool_t *pool);
gf_boolean_t __is_fuse_call (call_frame_t *frame);
#endif /* _STACK_H */
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index 899d8ef2b..8175faba4 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -53,17 +53,12 @@ gf_proc_dump_unlock (void)
pthread_mutex_unlock (&gf_proc_dump_mutex);
}
-
static int
-gf_proc_dump_open (char *dump_dir, char *brickname)
+gf_proc_dump_open (char *tmpname)
{
- char path[PATH_MAX] = {0,};
int dump_fd = -1;
- snprintf (path, sizeof (path), "%s/%s.%d.dump", (dump_dir ?
- dump_dir : "/tmp"), brickname, getpid());
-
- dump_fd = open (path, O_CREAT|O_RDWR|O_TRUNC|O_APPEND, 0600);
+ dump_fd = mkstemp (tmpname);
if (dump_fd < 0)
return -1;
@@ -71,7 +66,6 @@ gf_proc_dump_open (char *dump_dir, char *brickname)
return 0;
}
-
static void
gf_proc_dump_close (void)
{
@@ -79,6 +73,45 @@ gf_proc_dump_close (void)
gf_dump_fd = -1;
}
+static int
+gf_proc_dump_set_path (char *dump_options_file)
+{
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+
+ fp = fopen (dump_options_file, "r");
+ if (!fp)
+ goto out;
+
+ ret = fscanf (fp, "%s", buf);
+
+ while (ret != EOF) {
+ key = strtok_r (buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+
+ value = strtok_r (NULL, "=", &saveptr);
+
+ if (!value) {
+ ret = fscanf (fp, "%s", buf);
+ continue;
+ }
+ if (!strcmp (key, "path")) {
+ dump_options.dump_path = gf_strdup (value);
+ break;
+ }
+ }
+
+out:
+ if (fp)
+ fclose (fp);
+ return ret;
+}
int
gf_proc_dump_add_section (char *key, ...)
@@ -381,7 +414,7 @@ gf_proc_dump_xlator_info (xlator_t *top)
if (!top)
return;
- ctx = glusterfs_ctx_get ();
+ ctx = top->ctx;
trav = top;
while (trav) {
@@ -395,8 +428,6 @@ gf_proc_dump_xlator_info (xlator_t *top)
(trav->itable)) {
snprintf (itable_key, 1024, "%d.%s.itable",
ctx->graph_id, trav->name);
-
- inode_table_dump (trav->itable, itable_key);
}
if (!trav->dumpops) {
@@ -430,24 +461,20 @@ static void
gf_proc_dump_oldgraph_xlator_info (xlator_t *top)
{
xlator_t *trav = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char itable_key[1024] = {0,};
if (!top)
return;
- ctx = glusterfs_ctx_get ();
-
trav = top;
while (trav) {
gf_proc_dump_xlator_mem_info_only_in_use (trav);
if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
(trav->itable)) {
- snprintf (itable_key, 1024, "%d.%s.itable",
- ctx->graph_id, trav->name);
-
- inode_table_dump (trav->itable, itable_key);
+ /*TODO: dump inode table info if necessary by
+ printing the graph id (taken by glusterfs_cbtx_t)
+ in the key
+ */
}
if (!trav->dumpops) {
@@ -488,6 +515,44 @@ gf_proc_dump_enable_all_options ()
return 0;
}
+gf_boolean_t
+is_gf_proc_dump_all_disabled ()
+{
+ gf_boolean_t all_disabled = _gf_true;
+
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_mem, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_iobuf, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.dump_callpool, all_disabled,
+ out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_priv,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inode,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fd,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_inodectx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_fdctx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED (dump_options.xl_options.dump_history,
+ all_disabled, out);
+
+out:
+ return all_disabled;
+}
+
+/* These options are dumped by default if glusterdump.options
+ file exists and it is emtpty
+*/
+static int
+gf_proc_dump_enable_default_options ()
+{
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
+
+ return 0;
+}
+
static int
gf_proc_dump_disable_all_options ()
{
@@ -566,28 +631,44 @@ gf_proc_dump_options_init ()
int ret = -1;
FILE *fp = NULL;
char buf[256];
- char dumpbuf[GF_DUMP_MAX_BUF_LEN];
char *key = NULL, *value = NULL;
char *saveptr = NULL;
char dump_option_file[PATH_MAX];
+ /* glusterd will create a file glusterdump.<pid>.options and
+ sets the statedump options for the process and the file is removed
+ after the statedump is taken. Direct issue of SIGUSR1 does not have
+ mechanism for considering the statedump options. So to have a way
+ of configuring the statedump of all the glusterfs processes through
+ both cli command and SIGUSR1, glusterdump.options file is searched
+ and the options mentioned in it are given the higher priority.
+ */
snprintf (dump_option_file, sizeof (dump_option_file),
- "/tmp/glusterdump.%d.options", getpid ());
-
+ DEFAULT_VAR_RUN_DIRECTORY
+ "/glusterdump.options");
fp = fopen (dump_option_file, "r");
-
if (!fp) {
- //ENOENT, return success
- (void) gf_proc_dump_enable_all_options ();
- return 0;
+ snprintf (dump_option_file, sizeof (dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY
+ "/glusterdump.%d.options", getpid ());
+
+ fp = fopen (dump_option_file, "r");
+
+ if (!fp) {
+ //ENOENT, return success
+ (void) gf_proc_dump_enable_all_options ();
+ return 0;
+ }
}
(void) gf_proc_dump_disable_all_options ();
+ // swallow the errors if setting statedump file path is failed.
+ ret = gf_proc_dump_set_path (dump_option_file);
+
ret = fscanf (fp, "%s", buf);
while (ret != EOF) {
-
key = strtok_r (buf, "=", &saveptr);
if (!key) {
ret = fscanf (fp, "%s", buf);
@@ -601,28 +682,33 @@ gf_proc_dump_options_init ()
continue;
}
- snprintf (dumpbuf, sizeof (dumpbuf), "[Debug]:key=%s, value=%s\n",key,value);
- ret = write (gf_dump_fd, dumpbuf, strlen (dumpbuf));
-
gf_proc_dump_parse_set_option (key, value);
-
}
+ if (is_gf_proc_dump_all_disabled ())
+ (void) gf_proc_dump_enable_default_options ();
+
+ if (fp)
+ fclose (fp);
+
return 0;
}
void
-gf_proc_dump_info (int signum)
+gf_proc_dump_info (int signum, glusterfs_ctx_t *ctx)
{
- int i = 0;
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *trav = NULL;
- char brick_name[PATH_MAX] = {0,};
+ int i = 0;
+ int ret = -1;
+ glusterfs_graph_t *trav = NULL;
+ char brick_name[PATH_MAX] = {0,};
+ char timestr[256] = {0,};
+ char sign_string[512] = {0,};
+ char tmp_dump_name[PATH_MAX] = {0,};
+ char path[PATH_MAX] = {0,};
+ struct timeval tv = {0,};
gf_proc_dump_lock ();
- ctx = glusterfs_ctx_get ();
if (!ctx)
goto out;
@@ -631,14 +717,44 @@ gf_proc_dump_info (int signum)
} else
strncpy (brick_name, "glusterdump", sizeof (brick_name));
- ret = gf_proc_dump_open (ctx->statedump_path, brick_name);
+ ret = gf_proc_dump_options_init ();
if (ret < 0)
goto out;
- ret = gf_proc_dump_options_init ();
+ snprintf (path, sizeof (path), "%s/%s.%d.dump.%"PRIu64,
+ ((dump_options.dump_path != NULL)?dump_options.dump_path:
+ ((ctx->statedump_path != NULL)?ctx->statedump_path:
+ DEFAULT_VAR_RUN_DIRECTORY)), brick_name, getpid(),
+ (uint64_t) time (NULL));
+
+ snprintf (tmp_dump_name, PATH_MAX, "%s/dumpXXXXXX",
+ ((dump_options.dump_path != NULL)?dump_options.dump_path:
+ ((ctx->statedump_path != NULL)?ctx->statedump_path:
+ DEFAULT_VAR_RUN_DIRECTORY)));
+
+ ret = gf_proc_dump_open (tmp_dump_name);
if (ret < 0)
goto out;
+ //continue even though gettimeofday() has failed
+ ret = gettimeofday (&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ }
+
+ snprintf (sign_string, sizeof (sign_string), "DUMP-START-TIME: %s\n",
+ timestr);
+
+ //swallow the errors of write for start and end marker
+ ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+
+ memset (sign_string, 0, sizeof (sign_string));
+ memset (timestr, 0, sizeof (timestr));
+ memset (&tv, 0, sizeof (tv));
+
if (GF_PROC_DUMP_IS_OPTION_ENABLED (mem)) {
gf_proc_dump_mem_info ();
gf_proc_dump_mempool_info (ctx);
@@ -670,9 +786,24 @@ gf_proc_dump_info (int signum)
i++;
}
+ ret = gettimeofday (&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
+ snprintf (timestr + strlen (timestr),
+ sizeof timestr - strlen (timestr),
+ ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ }
+
+ snprintf (sign_string, sizeof (sign_string), "\nDUMP-END-TIME: %s",
+ timestr);
+ ret = write (gf_dump_fd, sign_string, strlen (sign_string));
+
out:
if (gf_dump_fd != -1)
gf_proc_dump_close ();
+ rename (tmp_dump_name, path);
+ GF_FREE (dump_options.dump_path);
+ dump_options.dump_path = NULL;
gf_proc_dump_unlock ();
return;
diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/statedump.h
index dc56bda0c..8342b120a 100644
--- a/libglusterfs/src/statedump.h
+++ b/libglusterfs/src/statedump.h
@@ -31,6 +31,7 @@ typedef struct gf_dump_options_ {
gf_boolean_t dump_iobuf;
gf_boolean_t dump_callpool;
gf_dump_xl_options_t xl_options; //options for all xlators
+ char *dump_path;
} gf_dump_options_t;
extern gf_dump_options_t dump_options;
@@ -55,13 +56,21 @@ void _gf_proc_dump_build_key (char *key, const char *prefix, char *fmt,...)
#define GF_PROC_DUMP_SET_OPTION(opt,val) opt = val
+#define GF_CHECK_DUMP_OPTION_ENABLED(option_dump, var, label) \
+ do { \
+ if (option_dump == _gf_true) { \
+ var = _gf_false; \
+ goto label; \
+ } \
+ } while (0);
+
void gf_proc_dump_init();
void gf_proc_dump_fini(void);
void gf_proc_dump_cleanup(void);
-void gf_proc_dump_info(int signum);
+void gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx);
int gf_proc_dump_add_section(char *key,...);
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
new file mode 100644
index 000000000..1e6601837
--- /dev/null
+++ b/libglusterfs/src/store.c
@@ -0,0 +1,709 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <libgen.h>
+
+#include "glusterfs.h"
+#include "store.h"
+#include "dict.h"
+#include "xlator.h"
+
+int32_t
+gf_store_mkdir (char *path)
+{
+ int32_t ret = -1;
+
+ ret = mkdir (path, 0777);
+
+ if ((-1 == ret) && (EEXIST != errno)) {
+ gf_log ("", GF_LOG_ERROR, "mkdir() failed on path %s,"
+ "errno: %s", path, strerror (errno));
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_handle_create_on_absence (gf_store_handle_t **shandle,
+ char *path)
+{
+ GF_ASSERT (shandle);
+ int32_t ret = 0;
+
+ if (*shandle == NULL) {
+ ret = gf_store_handle_new (path, shandle);
+
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Unable to create store"
+ " handle for path: %s", path);
+ }
+ }
+ return ret;
+}
+
+int32_t
+gf_store_mkstemp (gf_store_handle_t *shandle)
+{
+ int fd = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0600);
+ if (fd <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open %s, error: %s",
+ tmppath, strerror (errno));
+ }
+
+ return fd;
+}
+
+int
+gf_store_sync_direntry (char *path)
+{
+ int ret = -1;
+ int dirfd = -1;
+ char *dir = NULL;
+ char *pdir = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ dir = gf_strdup (path);
+ if (!dir)
+ goto out;
+
+ pdir = dirname (dir);
+ dirfd = open (pdir, O_RDONLY);
+ if (dirfd == -1) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to open directory "
+ "%s, due to %s", pdir, strerror (errno));
+ goto out;
+ }
+
+ ret = fsync (dirfd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to fsync %s, due to "
+ "%s", pdir, strerror (errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dirfd >= 0) {
+ ret = close (dirfd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "Failed to close "
+ "%s, due to %s", pdir, strerror (errno));
+ }
+ }
+
+ if (dir)
+ GF_FREE (dir);
+
+ return ret;
+}
+
+int32_t
+gf_store_rename_tmppath (gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ ret = rename (tmppath, shandle->path);
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_ERROR, "Failed to rename %s to %s, "
+ "error: %s", tmppath, shandle->path, strerror (errno));
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry (tmppath);
+out:
+ return ret;
+}
+
+int32_t
+gf_store_unlink_tmppath (gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {0,};
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (shandle->path);
+
+ snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
+ ret = unlink (tmppath);
+ if (ret && (errno != ENOENT)) {
+ gf_log ("", GF_LOG_ERROR, "Failed to mv %s to %s, error: %s",
+ tmppath, shandle->path, strerror (errno));
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int
+gf_store_read_and_tokenize (FILE *file, char *str, char **iter_key,
+ char **iter_val, gf_store_op_errno_t *store_errno)
+{
+ int32_t ret = -1;
+ char *savetok = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *temp = NULL;
+ size_t str_len = 0;
+
+ GF_ASSERT (file);
+ GF_ASSERT (str);
+ GF_ASSERT (iter_key);
+ GF_ASSERT (iter_val);
+ GF_ASSERT (store_errno);
+
+ temp = fgets (str, PATH_MAX, file);
+ if (temp == NULL || feof (file)) {
+ ret = -1;
+ *store_errno = GD_STORE_EOF;
+ goto out;
+ }
+
+ str_len = strlen(str);
+ str[str_len - 1] = '\0';
+ /* Truncate the "\n", as fgets stores "\n" in str */
+
+ key = strtok_r (str, "=", &savetok);
+ if (!key) {
+ ret = -1;
+ *store_errno = GD_STORE_KEY_NULL;
+ goto out;
+ }
+
+ value = strtok_r (NULL, "=", &savetok);
+ if (!value) {
+ ret = -1;
+ *store_errno = GD_STORE_VALUE_NULL;
+ goto out;
+ }
+
+ *iter_key = key;
+ *iter_val = value;
+ *store_errno = GD_STORE_SUCCESS;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gf_store_retrieve_value (gf_store_handle_t *handle, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *scan_str = NULL;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ char *free_str = NULL;
+ struct stat st = {0,};
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (handle);
+
+ if (handle->locked == F_ULOCK)
+ /* no locking is used handle->fd gets closed() after usage */
+ handle->fd = open (handle->path, O_RDWR);
+ else
+ /* handle->fd is valid already, kept open for lockf() */
+ lseek (handle->fd, 0, SEEK_SET);
+
+ if (handle->fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
+ handle->path, strerror (errno));
+ goto out;
+ }
+ if (!handle->read)
+ handle->read = fdopen (dup(handle->fd), "r");
+ else
+ fseek (handle->read, 0, SEEK_SET);
+
+ if (!handle->read) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
+ handle->path, strerror (errno));
+ goto out;
+ }
+
+ ret = fstat (handle->fd, &st);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "stat on file %s failed",
+ handle->path);
+ ret = -1;
+ store_errno = GD_STORE_STAT_FAILED;
+ goto out;
+ }
+
+ /* "st.st_size + 1" is used as we are fetching each
+ * line of a file using fgets, fgets will append "\0"
+ * to the end of the string
+ */
+ scan_str = GF_CALLOC (1, st.st_size + 1,
+ gf_common_mt_char);
+
+ if (scan_str == NULL) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+
+ free_str = scan_str;
+
+ do {
+ ret = gf_store_read_and_tokenize (handle->read, scan_str,
+ &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_TRACE, "error while reading key "
+ "'%s': %s", key,
+ gf_store_strerror (store_errno));
+ goto out;
+ }
+
+ gf_log ("", GF_LOG_TRACE, "key %s read", iter_key);
+
+ if (!strcmp (key, iter_key)) {
+ gf_log ("", GF_LOG_DEBUG, "key %s found", key);
+ ret = 0;
+ if (iter_val)
+ *value = gf_strdup (iter_val);
+ goto out;
+ }
+ } while (1);
+out:
+ if (handle->read) {
+ fclose (handle->read);
+ handle->read = NULL;
+ }
+
+ if (handle->fd > 0 && handle->locked == F_ULOCK) {
+ /* only invalidate handle->fd if not locked */
+ close (handle->fd);
+ }
+
+ GF_FREE (free_str);
+
+ return ret;
+}
+
+int32_t
+gf_store_save_value (int fd, char *key, char *value)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT (fd > 0);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+ dup_fd = dup (fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen (dup_fd, "a+");
+ if (fp == NULL) {
+ gf_log ("", GF_LOG_WARNING, "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fprintf (fp, "%s=%s\n", key, value);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "Unable to store key: %s,"
+ "value: %s, error: %s", key, value,
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush (fp);
+ if (feof (fp)) {
+ gf_log ("", GF_LOG_WARNING,
+ "fflush failed, error: %s",
+ strerror (errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose (fp);
+
+ gf_log ("", GF_LOG_DEBUG, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_new (char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ int fd = -1;
+ char *spath = NULL;
+
+ shandle = GF_CALLOC (1, sizeof (*shandle), gf_common_mt_store_handle_t);
+ if (!shandle)
+ goto out;
+
+ spath = gf_strdup (path);
+
+ if (!spath)
+ goto out;
+
+ fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0600);
+ if (fd <= 0) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open file: %s, error: %s",
+ path, strerror (errno));
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry (spath);
+ if (ret)
+ goto out;
+
+ shandle->path = spath;
+ shandle->locked = F_ULOCK;
+ *handle = shandle;
+
+ ret = 0;
+out:
+ if (fd > 0)
+ close (fd);
+
+ if (ret == -1) {
+ GF_FREE (spath);
+ GF_FREE (shandle);
+ }
+
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+gf_store_handle_retrieve (char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ struct stat statbuf = {0};
+
+ ret = stat (path, &statbuf);
+ if (ret) {
+ gf_log ("", GF_LOG_ERROR, "Path corresponding to "
+ "%s, returned error: (%s)",
+ path, strerror (errno));
+ goto out;
+ }
+ ret = gf_store_handle_new (path, handle);
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_destroy (gf_store_handle_t *handle)
+{
+ int32_t ret = -1;
+
+ if (!handle) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_FREE (handle->path);
+
+ GF_FREE (handle);
+
+ ret = 0;
+
+out:
+ gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_new (gf_store_handle_t *shandle, gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+ FILE *fp = NULL;
+ gf_store_iter_t *tmp_iter = NULL;
+
+ GF_ASSERT (shandle);
+ GF_ASSERT (iter);
+
+ fp = fopen (shandle->path, "r");
+ if (!fp) {
+ gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d",
+ shandle->path, errno);
+ goto out;
+ }
+
+ tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter),
+ gf_common_mt_store_iter_t);
+ if (!tmp_iter)
+ goto out;
+
+ strncpy (tmp_iter->filepath, shandle->path, sizeof (tmp_iter->filepath));
+ tmp_iter->filepath[sizeof (tmp_iter->filepath) - 1] = 0;
+ tmp_iter->file = fp;
+
+ *iter = tmp_iter;
+ tmp_iter = NULL;
+ ret = 0;
+
+out:
+ if (ret && fp)
+ fclose (fp);
+
+ GF_FREE (tmp_iter);
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_validate_key_value (char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno)
+{
+ int ret = 0;
+
+ GF_ASSERT (op_errno);
+ GF_ASSERT (storepath);
+
+ if ((key == NULL) && (val == NULL)) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid key and value (null) in %s", storepath);
+ *op_errno = GD_STORE_KEY_VALUE_NULL;
+ } else if (key == NULL) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid key (null) in %s", storepath);
+ *op_errno = GD_STORE_KEY_NULL;
+ } else if (val == NULL) {
+ ret = -1;
+ gf_log ("", GF_LOG_ERROR, "Glusterd store may be corrupted, "
+ "Invalid value (null) for key %s in %s", key,
+ storepath);
+ *op_errno = GD_STORE_VALUE_NULL;
+ } else {
+ ret = 0;
+ *op_errno = GD_STORE_SUCCESS;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_next (gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno)
+{
+ int32_t ret = -1;
+ char *scan_str = NULL;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ struct stat st = {0,};
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT (iter);
+ GF_ASSERT (key);
+ GF_ASSERT (value);
+
+ ret = stat (iter->filepath, &st);
+ if (ret < 0) {
+ gf_log ("", GF_LOG_WARNING, "stat on file failed");
+ ret = -1;
+ store_errno = GD_STORE_STAT_FAILED;
+ goto out;
+ }
+
+ /* "st.st_size + 1" is used as we are fetching each
+ * line of a file using fgets, fgets will append "\0"
+ * to the end of the string
+ */
+ scan_str = GF_CALLOC (1, st.st_size + 1,
+ gf_common_mt_char);
+ if (!scan_str) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+
+ ret = gf_store_read_and_tokenize (iter->file, scan_str,
+ &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = gf_store_validate_key_value (iter->filepath, iter_key,
+ iter_val, &store_errno);
+ if (ret)
+ goto out;
+
+ *key = gf_strdup (iter_key);
+ if (!*key) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ *value = gf_strdup (iter_val);
+ if (!*value) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ GF_FREE (scan_str);
+ if (ret) {
+ GF_FREE (*key);
+ GF_FREE (*value);
+ *key = NULL;
+ *value = NULL;
+ }
+ if (op_errno)
+ *op_errno = store_errno;
+
+ gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_matching (gf_store_iter_t *iter, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *tmp_key = NULL;
+ char *tmp_value = NULL;
+
+ ret = gf_store_iter_get_next (iter, &tmp_key, &tmp_value, NULL);
+ while (!ret) {
+ if (!strncmp (key, tmp_key, strlen (key))){
+ *value = tmp_value;
+ GF_FREE (tmp_key);
+ goto out;
+ }
+ GF_FREE (tmp_key);
+ tmp_key = NULL;
+ GF_FREE (tmp_value);
+ tmp_value = NULL;
+ ret = gf_store_iter_get_next (iter, &tmp_key, &tmp_value,
+ NULL);
+ }
+out:
+ return ret;
+}
+
+int32_t
+gf_store_iter_destroy (gf_store_iter_t *iter)
+{
+ int32_t ret = -1;
+
+ if (!iter)
+ return 0;
+
+ /* gf_store_iter_new will not return a valid iter object with iter->file
+ * being NULL*/
+ ret = fclose (iter->file);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Unable to close file: %s, ret: %d, "
+ "errno: %d" ,iter->filepath, ret, errno);
+
+ GF_FREE (iter);
+ return ret;
+}
+
+char*
+gf_store_strerror (gf_store_op_errno_t op_errno)
+{
+ switch (op_errno) {
+ case GD_STORE_SUCCESS:
+ return "Success";
+ case GD_STORE_KEY_NULL:
+ return "Invalid Key";
+ case GD_STORE_VALUE_NULL:
+ return "Invalid Value";
+ case GD_STORE_KEY_VALUE_NULL:
+ return "Invalid Key and Value";
+ case GD_STORE_EOF:
+ return "No data";
+ case GD_STORE_ENOMEM:
+ return "No memory";
+ default:
+ return "Invalid errno";
+ }
+ return "Invalid errno";
+}
+
+int
+gf_store_lock (gf_store_handle_t *sh)
+{
+ int ret;
+
+ GF_ASSERT (sh);
+ GF_ASSERT (sh->path);
+ GF_ASSERT (sh->locked == F_ULOCK);
+
+ sh->fd = open (sh->path, O_RDWR);
+ if (sh->fd == -1) {
+ gf_log ("", GF_LOG_ERROR, "Failed to open '%s': %s", sh->path,
+ strerror (errno));
+ return -1;
+ }
+
+ ret = lockf (sh->fd, F_LOCK, 0);
+ if (ret)
+ gf_log ("", GF_LOG_ERROR, "Failed to gain lock on '%s': %s",
+ sh->path, strerror (errno));
+ else
+ /* sh->locked is protected by the lockf(sh->fd) above */
+ sh->locked = F_LOCK;
+
+ return ret;
+}
+
+void
+gf_store_unlock (gf_store_handle_t *sh)
+{
+ GF_ASSERT (sh);
+ GF_ASSERT (sh->locked == F_LOCK);
+
+ sh->locked = F_ULOCK;
+ lockf (sh->fd, F_ULOCK, 0);
+ close (sh->fd);
+}
+
+int
+gf_store_locked_local (gf_store_handle_t *sh)
+{
+ GF_ASSERT (sh);
+
+ return (sh->locked == F_LOCK);
+}
diff --git a/libglusterfs/src/store.h b/libglusterfs/src/store.h
new file mode 100644
index 000000000..337103ff7
--- /dev/null
+++ b/libglusterfs/src/store.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_STORE_H_
+#define _GLUSTERD_STORE_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+
+struct gf_store_handle_ {
+ char *path;
+ int fd;
+ FILE *read;
+ int locked; /* state of lockf() */
+};
+
+typedef struct gf_store_handle_ gf_store_handle_t;
+
+struct gf_store_iter_ {
+ FILE *file;
+ char filepath[PATH_MAX];
+};
+
+typedef struct gf_store_iter_ gf_store_iter_t;
+
+typedef enum {
+ GD_STORE_SUCCESS,
+ GD_STORE_KEY_NULL,
+ GD_STORE_VALUE_NULL,
+ GD_STORE_KEY_VALUE_NULL,
+ GD_STORE_EOF,
+ GD_STORE_ENOMEM,
+ GD_STORE_STAT_FAILED
+} gf_store_op_errno_t;
+
+int32_t
+gf_store_mkdir (char *path);
+
+int32_t
+gf_store_handle_create_on_absence (gf_store_handle_t **shandle, char *path);
+
+int32_t
+gf_store_mkstemp (gf_store_handle_t *shandle);
+
+int
+gf_store_sync_direntry (char *path);
+
+int32_t
+gf_store_rename_tmppath (gf_store_handle_t *shandle);
+
+int32_t
+gf_store_unlink_tmppath (gf_store_handle_t *shandle);
+
+int
+gf_store_read_and_tokenize (FILE *file, char *str, char **iter_key,
+ char **iter_val, gf_store_op_errno_t *store_errno);
+
+int32_t
+gf_store_retrieve_value (gf_store_handle_t *handle, char *key, char **value);
+
+int32_t
+gf_store_save_value (int fd, char *key, char *value);
+
+int32_t
+gf_store_handle_new (char *path, gf_store_handle_t **handle);
+
+int
+gf_store_handle_retrieve (char *path, gf_store_handle_t **handle);
+
+int32_t
+gf_store_handle_destroy (gf_store_handle_t *handle);
+
+int32_t
+gf_store_iter_new (gf_store_handle_t *shandle, gf_store_iter_t **iter);
+
+int32_t
+gf_store_validate_key_value (char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_next (gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_matching (gf_store_iter_t *iter, char *key, char **value);
+
+int32_t
+gf_store_iter_destroy (gf_store_iter_t *iter);
+
+char*
+gf_store_strerror (gf_store_op_errno_t op_errno);
+
+int
+gf_store_lock (gf_store_handle_t *sh);
+
+void
+gf_store_unlock (gf_store_handle_t *sh);
+
+int
+gf_store_locked_local (gf_store_handle_t *sh);
+
+#endif
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index c84832dfb..c1620bb70 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -15,6 +15,160 @@
#include "syncop.h"
+int
+syncopctx_setfsuid (void *uid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!uid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+out:
+ if (opctx && uid) {
+ opctx->uid = *(uid_t *)uid;
+ opctx->valid |= SYNCOPCTX_UID;
+ }
+
+ return ret;
+}
+
+int
+syncopctx_setfsgid (void *gid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!gid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+out:
+ if (opctx && gid) {
+ opctx->gid = *(gid_t *)gid;
+ opctx->valid |= SYNCOPCTX_GID;
+ }
+
+ return ret;
+}
+
+int
+syncopctx_setfsgroups (int count, const void *groups)
+{
+ struct syncopctx *opctx = NULL;
+ gid_t *tmpgroups = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (count != 0 && !groups) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx ();
+
+ /* alloc for this thread the first time */
+ if (!opctx) {
+ opctx = GF_CALLOC (1, sizeof (*opctx), gf_common_mt_syncopctx);
+ if (!opctx) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncopctx_setctx (opctx);
+ if (ret != 0) {
+ GF_FREE (opctx);
+ opctx = NULL;
+ goto out;
+ }
+ }
+
+ /* resize internal groups as required */
+ if (count && opctx->grpsize < count) {
+ if (opctx->groups) {
+ tmpgroups = GF_REALLOC (opctx->groups,
+ (sizeof (gid_t) * count));
+ /* NOTE: Not really required to zero the reallocation,
+ * as ngrps controls the validity of data,
+ * making a note irrespective */
+ if (tmpgroups == NULL) {
+ opctx->grpsize = 0;
+ GF_FREE (opctx->groups);
+ opctx->groups = NULL;
+ ret = -1;
+ goto out;
+ }
+ }
+ else {
+ tmpgroups = GF_CALLOC (count, sizeof (gid_t),
+ gf_common_mt_syncopctx);
+ if (tmpgroups == NULL) {
+ opctx->grpsize = 0;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ opctx->groups = tmpgroups;
+ opctx->grpsize = count;
+ }
+
+ /* copy out the groups passed */
+ if (count)
+ memcpy (opctx->groups, groups, (sizeof (gid_t) * count));
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+out:
+ return ret;
+}
+
static void
__run (struct synctask *task)
{
@@ -23,27 +177,31 @@ __run (struct synctask *task)
env = task->env;
list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-running already running task");
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
+ break;
+ case SYNCTASK_RUN:
+ gf_log (task->xl->name, GF_LOG_DEBUG,
+ "re-running already running task");
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
+ "attempted to wake up zombie!!");
+ return;
+ }
list_add_tail (&task->all_tasks, &env->runq);
- env->runcount++;
- task->state = SYNCTASK_RUN;
+ env->runcount++;
+ task->state = SYNCTASK_RUN;
}
@@ -55,37 +213,52 @@ __wait (struct synctask *task)
env = task->env;
list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-waiting already waiting task");
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
+ break;
+ case SYNCTASK_RUN:
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "re-waiting already waiting task");
+ env->waitcount--;
+ break;
+ case SYNCTASK_DONE:
+ gf_log (task->xl->name, GF_LOG_WARNING,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
+ "attempted to sleep a zombie!!");
+ return;
+ }
list_add_tail (&task->all_tasks, &env->waitq);
- env->waitcount++;
- task->state = SYNCTASK_WAIT;
+ env->waitcount++;
+ task->state = SYNCTASK_WAIT;
}
void
synctask_yield (struct synctask *task)
{
+ xlator_t *oldTHIS = THIS;
+
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->proc->sched.uc_flags &= ~_UC_TLSBASE;
+#endif
+
+ if (task->state != SYNCTASK_DONE)
+ task->state = SYNCTASK_SUSPEND;
if (swapcontext (&task->ctx, &task->proc->sched) < 0) {
gf_log ("syncop", GF_LOG_ERROR,
"swapcontext failed (%s)", strerror (errno));
}
+
+ THIS = oldTHIS;
}
@@ -102,10 +275,10 @@ synctask_wake (struct synctask *task)
if (task->slept)
__run (task);
+
+ pthread_cond_broadcast (&env->cond);
}
pthread_mutex_unlock (&env->mutex);
-
- pthread_cond_broadcast (&env->cond);
}
void
@@ -118,8 +291,8 @@ synctask_wrap (struct synctask *old_task)
task = synctask_get ();
task->ret = task->syncfn (task->opaque);
- if (task->synccbk)
- task->synccbk (task->ret, task->frame, task->opaque);
+ if (task->synccbk)
+ task->synccbk (task->ret, task->frame, task->opaque);
task->state = SYNCTASK_DONE;
@@ -133,15 +306,15 @@ synctask_destroy (struct synctask *task)
if (!task)
return;
- if (task->stack)
- FREE (task->stack);
+ FREE (task->stack);
if (task->opframe)
STACK_DESTROY (task->opframe->root);
- pthread_mutex_destroy (&task->mutex);
-
- pthread_cond_destroy (&task->cond);
+ if (task->synccbk == NULL) {
+ pthread_mutex_destroy (&task->mutex);
+ pthread_cond_destroy (&task->cond);
+ }
FREE (task);
}
@@ -150,34 +323,50 @@ synctask_destroy (struct synctask *task)
void
synctask_done (struct synctask *task)
{
- if (task->synccbk) {
- synctask_destroy (task);
- return;
- }
+ if (task->synccbk) {
+ synctask_destroy (task);
+ return;
+ }
- pthread_mutex_lock (&task->mutex);
- {
- task->done = 1;
- pthread_cond_broadcast (&task->cond);
- }
- pthread_mutex_unlock (&task->mutex);
+ pthread_mutex_lock (&task->mutex);
+ {
+ task->state = SYNCTASK_ZOMBIE;
+ task->done = 1;
+ pthread_cond_broadcast (&task->cond);
+ }
+ pthread_mutex_unlock (&task->mutex);
}
int
-synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
- call_frame_t *frame, void *opaque)
+synctask_setid (struct synctask *task, uid_t uid, gid_t gid)
+{
+ if (!task)
+ return -1;
+
+ if (uid != -1)
+ task->uid = uid;
+
+ if (gid != -1)
+ task->gid = gid;
+
+ return 0;
+}
+
+
+struct synctask *
+synctask_create (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
{
struct synctask *newtask = NULL;
xlator_t *this = THIS;
- int ret = 0;
VALIDATE_OR_GOTO (env, err);
VALIDATE_OR_GOTO (fn, err);
newtask = CALLOC (1, sizeof (*newtask));
if (!newtask)
- return -ENOMEM;
+ return NULL;
newtask->frame = frame;
if (!frame) {
@@ -190,10 +379,15 @@ synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
newtask->env = env;
newtask->xl = this;
newtask->syncfn = fn;
- newtask->synccbk = cbk;
+ newtask->synccbk = cbk;
newtask->opaque = opaque;
+ /* default to the uid/gid of the passed frame */
+ newtask->uid = newtask->opframe->root->uid;
+ newtask->gid = newtask->opframe->root->gid;
+
INIT_LIST_HEAD (&newtask->all_tasks);
+ INIT_LIST_HEAD (&newtask->waitq);
if (getcontext (&newtask->ctx) < 0) {
gf_log ("syncop", GF_LOG_ERROR,
@@ -212,67 +406,114 @@ synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
newtask->ctx.uc_stack.ss_sp = newtask->stack;
newtask->ctx.uc_stack.ss_size = env->stacksize;
- makecontext (&newtask->ctx, (void *) synctask_wrap, 2, newtask);
+ makecontext (&newtask->ctx, (void (*)(void)) synctask_wrap, 2, newtask);
- newtask->state = SYNCTASK_INIT;
+ newtask->state = SYNCTASK_INIT;
newtask->slept = 1;
- if (!cbk) {
- pthread_mutex_init (&newtask->mutex, NULL);
- pthread_cond_init (&newtask->cond, NULL);
- newtask->done = 0;
- }
+ if (!cbk) {
+ pthread_mutex_init (&newtask->mutex, NULL);
+ pthread_cond_init (&newtask->cond, NULL);
+ newtask->done = 0;
+ }
synctask_wake (newtask);
+ /*
+ * Make sure someone's there to execute anything we just put on the
+ * run queue.
+ */
+ syncenv_scale(env);
- if (!cbk) {
- pthread_mutex_lock (&newtask->mutex);
- {
- while (!newtask->done) {
- pthread_cond_wait (&newtask->cond, &newtask->mutex);
- }
- }
- pthread_mutex_unlock (&newtask->mutex);
-
- ret = newtask->ret;
-
- synctask_destroy (newtask);
- }
-
- return ret;
+ return newtask;
err:
if (newtask) {
- if (newtask->stack)
- FREE (newtask->stack);
+ FREE (newtask->stack);
if (newtask->opframe)
STACK_DESTROY (newtask->opframe->root);
FREE (newtask);
}
- return -1;
+
+ return NULL;
+}
+
+
+int
+synctask_join (struct synctask *task)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&task->mutex);
+ {
+ while (!task->done)
+ pthread_cond_wait (&task->cond, &task->mutex);
+ }
+ pthread_mutex_unlock (&task->mutex);
+
+ ret = task->ret;
+
+ synctask_destroy (task);
+
+ return ret;
+}
+
+
+int
+synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
+{
+ struct synctask *newtask = NULL;
+ int ret = 0;
+
+ newtask = synctask_create (env, fn, cbk, frame, opaque);
+ if (!newtask)
+ return -1;
+
+ if (!cbk)
+ ret = synctask_join (newtask);
+
+ return ret;
}
struct synctask *
syncenv_task (struct syncproc *proc)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
struct synctask *task = NULL;
+ struct timespec sleep_till = {0, };
+ int ret = 0;
- env = proc->env;
+ env = proc->env;
pthread_mutex_lock (&env->mutex);
{
- while (list_empty (&env->runq))
- pthread_cond_wait (&env->cond, &env->mutex);
+ while (list_empty (&env->runq)) {
+ sleep_till.tv_sec = time (NULL) + SYNCPROC_IDLE_TIME;
+ ret = pthread_cond_timedwait (&env->cond, &env->mutex,
+ &sleep_till);
+ if (!list_empty (&env->runq))
+ break;
+ if ((ret == ETIMEDOUT) &&
+ (env->procs > env->procmin)) {
+ task = NULL;
+ env->procs--;
+ memset (proc, 0, sizeof (*proc));
+ goto unlock;
+ }
+ }
task = list_entry (env->runq.next, struct synctask, all_tasks);
list_del_init (&task->all_tasks);
- env->runcount--;
+ env->runcount--;
+
+ task->woken = 0;
+ task->slept = 0;
- task->proc = proc;
+ task->proc = proc;
}
+unlock:
pthread_mutex_unlock (&env->mutex);
return task;
@@ -289,8 +530,10 @@ synctask_switchto (struct synctask *task)
synctask_set (task);
THIS = task->xl;
- task->woken = 0;
- task->slept = 0;
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->ctx.uc_flags &= ~_UC_TLSBASE;
+#endif
if (swapcontext (&task->proc->sched, &task->ctx) < 0) {
gf_log ("syncop", GF_LOG_ERROR,
@@ -326,10 +569,12 @@ syncenv_processor (void *thdata)
for (;;) {
task = syncenv_task (proc);
+ if (!task)
+ break;
synctask_switchto (task);
- syncenv_scale (env);
+ syncenv_scale (env);
}
return NULL;
@@ -339,27 +584,39 @@ syncenv_processor (void *thdata)
void
syncenv_scale (struct syncenv *env)
{
- int thmax = 0;
- int i = 0;
- int ret = 0;
+ int diff = 0;
+ int scale = 0;
+ int i = 0;
+ int ret = 0;
- pthread_mutex_lock (&env->mutex);
- {
- if (env->procs > env->runcount)
- goto unlock;
-
- thmax = min (env->runcount, SYNCENV_PROC_MAX);
- for (i = env->procs; i < thmax; i++) {
- env->proc[i].env = env;
- ret = pthread_create (&env->proc[i].processor, NULL,
- syncenv_processor, &env->proc[i]);
- if (ret)
- break;
- env->procs++;
- }
- }
+ pthread_mutex_lock (&env->mutex);
+ {
+ if (env->procs > env->runcount)
+ goto unlock;
+
+ scale = env->runcount;
+ if (scale > env->procmax)
+ scale = env->procmax;
+ if (scale > env->procs)
+ diff = scale - env->procs;
+ while (diff) {
+ diff--;
+ for (; (i < env->procmax); i++) {
+ if (env->proc[i].processor == 0)
+ break;
+ }
+
+ env->proc[i].env = env;
+ ret = gf_thread_create (&env->proc[i].processor, NULL,
+ syncenv_processor, &env->proc[i]);
+ if (ret)
+ break;
+ env->procs++;
+ i++;
+ }
+ }
unlock:
- pthread_mutex_unlock (&env->mutex);
+ pthread_mutex_unlock (&env->mutex);
}
@@ -371,12 +628,20 @@ syncenv_destroy (struct syncenv *env)
struct syncenv *
-syncenv_new (size_t stacksize)
+syncenv_new (size_t stacksize, int procmin, int procmax)
{
struct syncenv *newenv = NULL;
int ret = 0;
int i = 0;
+ if (!procmin || procmin < 0)
+ procmin = SYNCENV_PROC_MIN;
+ if (!procmax || procmax > SYNCENV_PROC_MAX)
+ procmax = SYNCENV_PROC_MAX;
+
+ if (procmin > procmax)
+ return NULL;
+
newenv = CALLOC (1, sizeof (*newenv));
if (!newenv)
@@ -391,11 +656,13 @@ syncenv_new (size_t stacksize)
newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
if (stacksize)
newenv->stacksize = stacksize;
+ newenv->procmin = procmin;
+ newenv->procmax = procmax;
- for (i = 0; i < SYNCENV_PROC_MIN; i++) {
+ for (i = 0; i < newenv->procmin; i++) {
newenv->proc[i].env = newenv;
- ret = pthread_create (&newenv->proc[i].processor, NULL,
- syncenv_processor, &newenv->proc[i]);
+ ret = gf_thread_create (&newenv->proc[i].processor, NULL,
+ syncenv_processor, &newenv->proc[i]);
if (ret)
break;
newenv->procs++;
@@ -408,6 +675,268 @@ syncenv_new (size_t stacksize)
}
+int
+synclock_init (synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_init (&lock->cond, 0);
+ lock->lock = 0;
+ INIT_LIST_HEAD (&lock->waitq);
+
+ return pthread_mutex_init (&lock->guard, 0);
+}
+
+
+int
+synclock_destroy (synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_destroy (&lock->cond);
+ return pthread_mutex_destroy (&lock->guard);
+}
+
+
+static int
+__synclock_lock (struct synclock *lock)
+{
+ struct synctask *task = NULL;
+
+ if (!lock)
+ return -1;
+
+ task = synctask_get ();
+
+ while (lock->lock) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail (&task->waitq, &lock->waitq);
+ pthread_mutex_unlock (&lock->guard);
+ synctask_yield (task);
+ /* task is removed from waitq in unlock,
+ * under lock->guard.*/
+ pthread_mutex_lock (&lock->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait (&lock->cond, &lock->guard);
+ }
+ }
+
+ lock->lock = _gf_true;
+ lock->owner = task;
+
+ return 0;
+}
+
+
+int
+synclock_lock (synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ ret = __synclock_lock (lock);
+ }
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+
+int
+synclock_trylock (synclock_t *lock)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ if (lock->lock) {
+ errno = EBUSY;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = __synclock_lock (lock);
+ }
+unlock:
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+
+static int
+__synclock_unlock (synclock_t *lock)
+{
+ struct synctask *task = NULL;
+ struct synctask *curr = NULL;
+
+ if (!lock)
+ return -1;
+
+ curr = synctask_get ();
+
+ if (lock->owner != curr) {
+ /* warn ? */
+ }
+
+ lock->lock = _gf_false;
+
+ /* There could be both synctasks and non synctasks
+ waiting (or none, or either). As a mid-approach
+ between maintaining too many waiting counters
+ at one extreme and a thundering herd on unlock
+ at the other, call a cond_signal (which wakes
+ one waiter) and first synctask waiter. So at
+ most we have two threads waking up to grab the
+ just released lock.
+ */
+ pthread_cond_signal (&lock->cond);
+ if (!list_empty (&lock->waitq)) {
+ task = list_entry (lock->waitq.next, struct synctask, waitq);
+ list_del_init (&task->waitq);
+ synctask_wake (task);
+ }
+
+ return 0;
+}
+
+
+int
+synclock_unlock (synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&lock->guard);
+ {
+ ret = __synclock_unlock (lock);
+ }
+ pthread_mutex_unlock (&lock->guard);
+
+ return ret;
+}
+
+/* Barriers */
+
+int
+syncbarrier_init (struct syncbarrier *barrier)
+{
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ pthread_cond_init (&barrier->cond, 0);
+ barrier->count = 0;
+ INIT_LIST_HEAD (&barrier->waitq);
+
+ return pthread_mutex_init (&barrier->guard, 0);
+}
+
+
+int
+syncbarrier_destroy (struct syncbarrier *barrier)
+{
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ pthread_cond_destroy (&barrier->cond);
+ return pthread_mutex_destroy (&barrier->guard);
+}
+
+
+static int
+__syncbarrier_wait (struct syncbarrier *barrier, int waitfor)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ task = synctask_get ();
+
+ while (barrier->count < waitfor) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail (&task->waitq, &barrier->waitq);
+ pthread_mutex_unlock (&barrier->guard);
+ synctask_yield (task);
+ pthread_mutex_lock (&barrier->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait (&barrier->cond, &barrier->guard);
+ }
+ }
+
+ barrier->count = 0;
+
+ return 0;
+}
+
+
+int
+syncbarrier_wait (struct syncbarrier *barrier, int waitfor)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&barrier->guard);
+ {
+ ret = __syncbarrier_wait (barrier, waitfor);
+ }
+ pthread_mutex_unlock (&barrier->guard);
+
+ return ret;
+}
+
+
+static int
+__syncbarrier_wake (struct syncbarrier *barrier)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ barrier->count++;
+
+ pthread_cond_signal (&barrier->cond);
+ if (!list_empty (&barrier->waitq)) {
+ task = list_entry (barrier->waitq.next, struct synctask, waitq);
+ list_del_init (&task->waitq);
+ synctask_wake (task);
+ }
+
+ return 0;
+}
+
+
+int
+syncbarrier_wake (struct syncbarrier *barrier)
+{
+ int ret = 0;
+
+ pthread_mutex_lock (&barrier->guard);
+ {
+ ret = __syncbarrier_wake (barrier);
+ }
+ pthread_mutex_unlock (&barrier->guard);
+
+ return ret;
+}
+
+
/* FOPS */
@@ -470,6 +999,8 @@ entry_copy (gf_dirent_t *source)
sink->d_type = source->d_type;
sink->d_stat = source->d_stat;
+ if (source->inode)
+ sink->inode = inode_ref (source->inode);
return sink;
}
@@ -628,6 +1159,34 @@ syncop_opendir (xlator_t *subvol,
}
int
+syncop_fsyncdir_cbk (call_frame_t *frame, void* cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fsyncdir_cbk, subvol->fops->fsyncdir,
+ fd, datasync, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
syncop_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
@@ -921,9 +1480,6 @@ syncop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
args->op_ret = op_ret;
args->op_errno = op_errno;
- if (op_ret != -1)
- fd_ref (fd);
-
__wake (args);
return 0;
@@ -981,9 +1537,12 @@ syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
SYNCOP (subvol, (&args), syncop_readv_cbk, subvol->fops->readv,
fd, size, off, flags, NULL);
+ if (args.op_ret < 0)
+ goto out;
+
if (vector)
*vector = args.vector;
- else if (args.vector)
+ else
GF_FREE (args.vector);
if (count)
@@ -995,6 +1554,7 @@ syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
else if (args.iobref)
iobref_unref (args.iobref);
+out:
errno = args.op_errno;
return args.op_ret;
@@ -1018,14 +1578,15 @@ syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
+syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector,
int32_t count, off_t offset, struct iobref *iobref,
uint32_t flags)
{
struct syncargs args = {0, };
SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
- fd, vector, count, offset, flags, iobref, NULL);
+ fd, (struct iovec *) vector, count, offset, flags, iobref,
+ NULL);
errno = args.op_errno;
return args.op_ret;
@@ -1069,8 +1630,8 @@ syncop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
args->op_ret = op_ret;
args->op_errno = op_errno;
- if (op_ret != -1)
- fd_ref (fd);
+ if (buf)
+ args->iatt1 = *buf;
__wake (args);
@@ -1079,7 +1640,7 @@ syncop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *xdata)
+ fd_t *fd, dict_t *xdata, struct iatt *iatt)
{
struct syncargs args = {0, };
@@ -1087,6 +1648,9 @@ syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
loc, flags, mode, 0, fd, xdata);
errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
return args.op_ret;
}
@@ -1121,9 +1685,39 @@ syncop_unlink (xlator_t *subvol, loc_t *loc)
}
int
+syncop_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_rmdir (xlator_t *subvol, loc_t *loc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_rmdir_cbk, subvol->fops->rmdir, loc,
+ 0, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
struct syncargs *args = NULL;
@@ -1152,6 +1746,41 @@ syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
return args.op_ret;
}
+
+int
+syncop_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_rename_cbk, subvol->fops->rename,
+ oldloc, newloc, NULL);
+
+ errno = args.op_errno;
+
+ return args.op_ret;
+}
+
+
int
syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, struct iatt *prebuf,
@@ -1212,12 +1841,43 @@ syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-syncop_fsync (xlator_t *subvol, fd_t *fd)
+syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly)
{
struct syncargs args = {0, };
SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync,
- fd, 0, NULL);
+ fd, dataonly, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+
+}
+
+
+int
+syncop_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+
+}
+
+int
+syncop_flush (xlator_t *subvol, fd_t *fd)
+{
+ struct syncargs args = {0};
+
+ SYNCOP (subvol, (&args), syncop_flush_cbk, subvol->fops->flush,
+ fd, NULL);
errno = args.op_errno;
return args.op_ret;
@@ -1287,6 +1947,8 @@ syncop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
args->op_ret = op_ret;
args->op_errno = op_errno;
+ if (buf)
+ args->iatt1 = *buf;
__wake (args);
@@ -1294,7 +1956,8 @@ syncop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
int
-syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict)
+syncop_symlink (xlator_t *subvol, loc_t *loc, const char *newpath, dict_t *dict,
+ struct iatt *iatt)
{
struct syncargs args = {0, };
@@ -1302,6 +1965,9 @@ syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict)
newpath, loc, 0, dict);
errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
return args.op_ret;
}
@@ -1336,8 +2002,7 @@ syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size)
if (buffer)
*buffer = args.buffer;
- else if (args.buffer)
- GF_FREE (args.buffer);
+ else GF_FREE (args.buffer);
errno = args.op_errno;
return args.op_ret;
@@ -1356,6 +2021,9 @@ syncop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
args->op_ret = op_ret;
args->op_errno = op_errno;
+ if (buf)
+ args->iatt1 = *buf;
+
__wake (args);
return 0;
@@ -1363,7 +2031,7 @@ syncop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int
syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict)
+ dict_t *dict, struct iatt *iatt)
{
struct syncargs args = {0, };
@@ -1371,6 +2039,199 @@ syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
loc, mode, rdev, 0, dict);
errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
return args.op_ret;
}
+
+
+int
+syncop_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict,
+ struct iatt *iatt)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_mkdir_cbk, subvol->fops->mkdir,
+ loc, mode, 0, dict);
+
+ errno = args.op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
+
+ return args.op_ret;
+
+}
+
+int
+syncop_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_access_cbk, subvol->fops->access,
+ loc, mask, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_fallocate_cbk, subvol->fops->fallocate,
+ fd, keep_size, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_discard_cbk, subvol->fops->discard,
+ fd, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ __wake (args);
+
+ return 0;
+}
+
+int
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_zerofill_cbk, subvol->fops->zerofill,
+ fd, offset, len, NULL);
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+
+int
+syncop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (flock)
+ args->flock = *flock;
+ __wake (args);
+
+ return 0;
+}
+
+
+int
+syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock)
+{
+ struct syncargs args = {0, };
+
+ SYNCOP (subvol, (&args), syncop_lk_cbk, subvol->fops->lk,
+ fd, cmd, flock, NULL);
+
+ errno = args.op_errno;
+ *flock = args.flock;
+
+ return args.op_ret;
+}
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
index 726e8c49a..f790981f0 100644
--- a/libglusterfs/src/syncop.h
+++ b/libglusterfs/src/syncop.h
@@ -23,6 +23,14 @@
#define SYNCENV_PROC_MAX 16
#define SYNCENV_PROC_MIN 2
+#define SYNCPROC_IDLE_TIME 600
+
+/*
+ * Flags for syncopctx valid elements
+ */
+#define SYNCOPCTX_UID 0x00000001
+#define SYNCOPCTX_GID 0x00000002
+#define SYNCOPCTX_GROUPS 0x00000004
struct synctask;
struct syncproc;
@@ -35,11 +43,12 @@ typedef int (*synctask_fn_t) (void *opaque);
typedef enum {
- SYNCTASK_INIT = 0,
- SYNCTASK_RUN,
+ SYNCTASK_INIT = 0,
+ SYNCTASK_RUN,
SYNCTASK_SUSPEND,
- SYNCTASK_WAIT,
- SYNCTASK_DONE,
+ SYNCTASK_WAIT,
+ SYNCTASK_DONE,
+ SYNCTASK_ZOMBIE,
} synctask_state_t;
/* for one sequential execution of @syncfn */
@@ -51,19 +60,24 @@ struct synctask {
call_frame_t *opframe;
synctask_cbk_t synccbk;
synctask_fn_t syncfn;
- synctask_state_t state;
+ synctask_state_t state;
void *opaque;
void *stack;
int woken;
int slept;
- int ret;
+ int ret;
+
+ uid_t uid;
+ gid_t gid;
ucontext_t ctx;
- struct syncproc *proc;
+ struct syncproc *proc;
- pthread_mutex_t mutex; /* for synchronous spawning of synctask */
- pthread_cond_t cond;
- int done;
+ pthread_mutex_t mutex; /* for synchronous spawning of synctask */
+ pthread_cond_t cond;
+ int done;
+
+ struct list_head waitq; /* can wait only "once" at a time */
};
@@ -84,6 +98,9 @@ struct syncenv {
struct list_head waitq;
int waitcount;
+ int procmin;
+ int procmax;
+
pthread_mutex_t mutex;
pthread_cond_t cond;
@@ -91,6 +108,25 @@ struct syncenv {
};
+struct synclock {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ gf_boolean_t lock; /* _gf_true or _gf_false, lock status */
+ struct synctask *owner; /* NULL if current owner is not a synctask */
+};
+typedef struct synclock synclock_t;
+
+
+struct syncbarrier {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ int count; /* count the number of wakes */
+};
+typedef struct syncbarrier syncbarrier_t;
+
+
struct syncargs {
int op_ret;
int op_errno;
@@ -104,42 +140,196 @@ struct syncargs {
struct iobref *iobref;
char *buffer;
dict_t *xdata;
+ struct gf_flock flock;
/* some more _cbk needs */
uuid_t uuid;
char *errstr;
dict_t *dict;
+ pthread_mutex_t lock_dict;
+
+ syncbarrier_t barrier;
/* do not touch */
struct synctask *task;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int done;
+};
+
+struct syncopctx {
+ unsigned int valid; /* valid flags for elements that are set */
+ uid_t uid;
+ gid_t gid;
+ int grpsize;
+ int ngrps;
+ gid_t *groups;
};
-#define __wake(args) synctask_wake(args->task)
+#define __yawn(args) do { \
+ args->task = synctask_get (); \
+ if (args->task) \
+ break; \
+ pthread_mutex_init (&args->mutex, NULL); \
+ pthread_cond_init (&args->cond, NULL); \
+ args->done = 0; \
+ } while (0)
+
+
+#define __wake(args) do { \
+ if (args->task) { \
+ synctask_wake (args->task); \
+ } else { \
+ pthread_mutex_lock (&args->mutex); \
+ { \
+ args->done = 1; \
+ pthread_cond_signal (&args->cond); \
+ } \
+ pthread_mutex_unlock (&args->mutex); \
+ } \
+ } while (0)
+
+
+#define __yield(args) do { \
+ if (args->task) { \
+ synctask_yield (args->task); \
+ } else { \
+ pthread_mutex_lock (&args->mutex); \
+ { \
+ while (!args->done) \
+ pthread_cond_wait (&args->cond, \
+ &args->mutex); \
+ } \
+ pthread_mutex_unlock (&args->mutex); \
+ pthread_mutex_destroy (&args->mutex); \
+ pthread_cond_destroy (&args->cond); \
+ } \
+ } while (0)
#define SYNCOP(subvol, stb, cbk, op, params ...) do { \
struct synctask *task = NULL; \
+ call_frame_t *frame = NULL; \
\
task = synctask_get (); \
stb->task = task; \
+ if (task) \
+ frame = task->opframe; \
+ else \
+ frame = syncop_create_frame (THIS); \
+ \
+ if (task) { \
+ frame->root->uid = task->uid; \
+ frame->root->gid = task->gid; \
+ } \
+ \
+ __yawn (stb); \
+ \
+ STACK_WIND_COOKIE (frame, cbk, (void *)stb, subvol, \
+ op, params); \
\
- STACK_WIND_COOKIE (task->opframe, cbk, (void *)stb, \
- subvol, op, params); \
- task->state = SYNCTASK_SUSPEND; \
- synctask_yield (stb->task); \
- STACK_RESET (task->opframe->root); \
+ __yield (stb); \
+ if (task) \
+ STACK_RESET (frame->root); \
+ else \
+ STACK_DESTROY (frame->root); \
} while (0)
#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
-struct syncenv * syncenv_new ();
+struct syncenv * syncenv_new (size_t stacksize, int procmin, int procmax);
void syncenv_destroy (struct syncenv *);
void syncenv_scale (struct syncenv *env);
int synctask_new (struct syncenv *, synctask_fn_t, synctask_cbk_t, call_frame_t* frame, void *);
+struct synctask *synctask_create (struct syncenv *, synctask_fn_t,
+ synctask_cbk_t, call_frame_t *, void *);
+int synctask_join (struct synctask *task);
void synctask_wake (struct synctask *task);
void synctask_yield (struct synctask *task);
+void synctask_waitfor (struct synctask *task, int count);
+
+#define synctask_barrier_init(args) syncbarrier_init (&args->barrier)
+#define synctask_barrier_wait(args, n) syncbarrier_wait (&args->barrier, n)
+#define synctask_barrier_wake(args) syncbarrier_wake (&args->barrier)
+
+int synctask_setid (struct synctask *task, uid_t uid, gid_t gid);
+#define SYNCTASK_SETID(uid, gid) synctask_setid (synctask_get(), uid, gid);
+
+int syncopctx_setfsuid (void *uid);
+int syncopctx_setfsgid (void *gid);
+int syncopctx_setfsgroups (int count, const void *groups);
+
+static inline call_frame_t *
+syncop_create_frame (xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ngrps = -1;
+ struct syncopctx *opctx = NULL;
+
+ frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+
+ frame->root->pid = getpid ();
+
+ opctx = syncopctx_getctx ();
+ if (opctx && (opctx->valid & SYNCOPCTX_UID))
+ frame->root->uid = opctx->uid;
+ else
+ frame->root->uid = geteuid ();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GID))
+ frame->root->gid = opctx->gid;
+ else
+ frame->root->gid = getegid ();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GROUPS)) {
+ ngrps = opctx->ngrps;
+
+ if (ngrps != 0 && opctx->groups != NULL) {
+ if (call_stack_alloc_groups (frame->root, ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ memcpy (frame->root->groups, opctx->groups,
+ (sizeof (gid_t) * ngrps));
+ }
+ }
+ else {
+ ngrps = getgroups (0, 0);
+ if (ngrps < 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ if (call_stack_alloc_groups (frame->root, ngrps) != 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+
+ if (getgroups (ngrps, frame->root->groups) < 0) {
+ STACK_DESTROY (frame->root);
+ return NULL;
+ }
+ }
+
+ return frame;
+}
+
+int synclock_init (synclock_t *lock);
+int synclock_destory (synclock_t *lock);
+int synclock_lock (synclock_t *lock);
+int synclock_trylock (synclock_t *lock);
+int synclock_unlock (synclock_t *lock);
+
+
+int syncbarrier_init (syncbarrier_t *barrier);
+int syncbarrier_wait (syncbarrier_t *barrier, int waitfor);
+int syncbarrier_wake (syncbarrier_t *barrier);
+int syncbarrier_destroy (syncbarrier_t *barrier);
int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
/* out */
@@ -174,13 +364,13 @@ int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);
int syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name);
int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *dict);
+ fd_t *fd, dict_t *dict, struct iatt *iatt);
int syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd);
int syncop_close (fd_t *fd);
int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
off_t offset, struct iobref *iobref, uint32_t flags);
-int syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
+int syncop_writev (xlator_t *subvol, fd_t *fd, const struct iovec *vector,
int32_t count, off_t offset, struct iobref *iobref,
uint32_t flags);
int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
@@ -192,14 +382,31 @@ int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset);
int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);
int syncop_unlink (xlator_t *subvol, loc_t *loc);
+int syncop_rmdir (xlator_t *subvol, loc_t *loc);
-int syncop_fsync (xlator_t *subvol, fd_t *fd);
+int syncop_fsync (xlator_t *subvol, fd_t *fd, int dataonly);
+int syncop_flush (xlator_t *subvol, fd_t *fd);
int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf);
int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf);
-int syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict);
+int syncop_symlink (xlator_t *subvol, loc_t *loc, const char *newpath,
+ dict_t *dict, struct iatt *iatt);
int syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size);
int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict);
+ dict_t *dict, struct iatt *iatt);
+int syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict,
+ struct iatt *iatt);
int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
+int syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync);
+int syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask);
+int syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len);
+int syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len);
+
+int syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, size_t len);
+
+int syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
+
+int syncop_lk (xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock);
+
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index bb834dbfd..e8954cc23 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -458,3 +458,26 @@ sys_access (const char *pathname, int mode)
{
return access (pathname, mode);
}
+
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+#ifdef HAVE_FALLOCATE
+ return fallocate(fd, mode, offset, len);
+#endif
+
+#ifdef HAVE_POSIX_FALLOCATE
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+
+ return posix_fallocate(fd, offset, len);
+#endif
+
+ errno = ENOSYS;
+ return -1;
+}
+
diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h
index d5c6ce5f6..f1c9f58c3 100644
--- a/libglusterfs/src/syscall.h
+++ b/libglusterfs/src/syscall.h
@@ -139,4 +139,6 @@ sys_access (const char *pathname, int mode);
int
sys_ftruncate (int fd, off_t length);
+int sys_fallocate(int fd, int mode, off_t offset, off_t len);
+
#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index ae40142ad..a059cc212 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -17,19 +17,18 @@
#include "logging.h"
#include "common-utils.h"
#include "globals.h"
-
-#define TS(tv) ((((unsigned long long) tv.tv_sec) * 1000000) + (tv.tv_usec))
+#include "timespec.h"
gf_timer_t *
gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
+ struct timespec delta,
gf_timer_cbk_t callbk,
void *data)
{
gf_timer_registry_t *reg = NULL;
gf_timer_t *event = NULL;
gf_timer_t *trav = NULL;
- unsigned long long at = 0L;
+ uint64_t at = 0;
if (ctx == NULL)
{
@@ -48,10 +47,8 @@ gf_timer_call_after (glusterfs_ctx_t *ctx,
if (!event) {
return NULL;
}
- gettimeofday (&event->at, NULL);
- event->at.tv_usec = ((event->at.tv_usec + delta.tv_usec) % 1000000);
- event->at.tv_sec += ((event->at.tv_usec + delta.tv_usec) / 1000000);
- event->at.tv_sec += delta.tv_sec;
+ timespec_now (&event->at);
+ timespec_adjust_delta (&event->at, delta);
at = TS (event->at);
event->callbk = callbk;
event->data = data;
@@ -127,7 +124,7 @@ void *
gf_timer_proc (void *ctx)
{
gf_timer_registry_t *reg = NULL;
- const struct timespec sleepts = {.tv_sec = 1, .tv_nsec = 0, };
+ const struct timespec sleepts = {.tv_sec = 1, .tv_nsec = 0, };
if (ctx == NULL)
{
@@ -142,14 +139,14 @@ gf_timer_proc (void *ctx)
}
while (!reg->fin) {
- unsigned long long now;
- struct timeval now_tv;
+ uint64_t now;
+ struct timespec now_ts;
gf_timer_t *event = NULL;
- gettimeofday (&now_tv, NULL);
- now = TS (now_tv);
+ timespec_now (&now_ts);
+ now = TS (now_ts);
while (1) {
- unsigned long long at;
+ uint64_t at;
char need_cbk = 0;
pthread_mutex_lock (&reg->lock);
@@ -213,7 +210,7 @@ gf_timer_registry_init (glusterfs_ctx_t *ctx)
reg->stale.prev = &reg->stale;
ctx->timer = reg;
- pthread_create (&reg->th, NULL, gf_timer_proc, ctx);
+ gf_thread_create (&reg->th, NULL, gf_timer_proc, ctx);
}
out:
return ctx->timer;
diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/timer.h
index 2954f6aff..2f963adbf 100644
--- a/libglusterfs/src/timer.h
+++ b/libglusterfs/src/timer.h
@@ -25,7 +25,7 @@ typedef void (*gf_timer_cbk_t) (void *);
struct _gf_timer {
struct _gf_timer *next, *prev;
- struct timeval at;
+ struct timespec at;
gf_timer_cbk_t callbk;
void *data;
xlator_t *xl;
@@ -44,7 +44,7 @@ typedef struct _gf_timer_registry gf_timer_registry_t;
gf_timer_t *
gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
+ struct timespec delta,
gf_timer_cbk_t cbk,
void *data);
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
new file mode 100644
index 000000000..a0c281a1e
--- /dev/null
+++ b/libglusterfs/src/timespec.c
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || defined GF_BSD_HOST_OS
+#include <time.h>
+#include <sys/time.h>
+#endif
+
+#if defined GF_DARWIN_HOST_OS
+#include <mach/mach_time.h>
+#endif
+
+#include "logging.h"
+#include "time.h"
+
+
+void tv2ts (struct timeval tv, struct timespec *ts)
+{
+ ts->tv_sec = tv.tv_sec;
+ ts->tv_nsec = tv.tv_usec * 1000;
+}
+
+void timespec_now (struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || defined GF_BSD_HOST_OS
+
+ if (0 == clock_gettime(CLOCK_MONOTONIC, ts))
+ return;
+ else {
+ struct timeval tv;
+ if (0 == gettimeofday(&tv, NULL))
+ tv2ts(tv, ts);
+ }
+#elif defined GF_DARWIN_HOST_OS
+ mach_timebase_info_data_t tb = { 0 };
+ static double timebase = 0.0;
+ uint64_t time = 0;
+ mach_timebase_info (&tb);
+
+ timebase *= info.numer;
+ timebase /= info.denom;
+
+ time = mach_absolute_time();
+ time *= timebase;
+
+ ts->tv_sec = (time * NANO);
+ ts->tv_nsec = (time - (ts.tv_sec * GIGA));
+
+#endif /* Platform verification */
+ gf_log_callingfn ("timer", GF_LOG_DEBUG, "%"PRIu64".%09"PRIu64,
+ ts->tv_sec, ts->tv_nsec);
+}
+
+void timespec_adjust_delta (struct timespec *ts, struct timespec delta)
+{
+ ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+ ts->tv_sec += ((ts->tv_nsec + delta.tv_nsec) / 1000000000);
+ ts->tv_sec += delta.tv_sec;
+}
diff --git a/libglusterfs/src/timespec.h b/libglusterfs/src/timespec.h
new file mode 100644
index 000000000..490255df9
--- /dev/null
+++ b/libglusterfs/src/timespec.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INCLUDE_TIMESPEC_H__
+#define __INCLUDE_TIMESPEC_H__
+
+#include <stdint.h>
+
+#define TS(ts) ((ts.tv_sec * 1000000000LL) + ts.tv_nsec)
+#define NANO (+1.0E-9)
+#define GIGA UINT64_C(1000000000)
+
+void tv2ts (struct timeval tv, struct timespec *ts);
+void timespec_now (struct timespec *ts);
+void timespec_adjust_delta (struct timespec *ts, struct timespec delta);
+
+#endif /* __INCLUDE_TIMESPEC_H__ */
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
index b7c597842..f96bbebf6 100644
--- a/libglusterfs/src/trie.c
+++ b/libglusterfs/src/trie.c
@@ -114,8 +114,7 @@ trienode_free (trienode_t *node)
trienode_free (trav);
}
- if (node->data)
- GF_FREE (node->data);
+ GF_FREE (node->data);
GF_FREE (node);
}
@@ -373,8 +372,7 @@ trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
static int
trienode_reset (trienode_t *node, void *data)
{
- if (node->data)
- GF_FREE (node->data);
+ GF_FREE (node->data);
return 0;
}
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 470087df9..a277c58a8 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -79,6 +79,9 @@ fill_defaults (xlator_t *xl)
SET_DEFAULT_FOP (fxattrop);
SET_DEFAULT_FOP (setattr);
SET_DEFAULT_FOP (fsetattr);
+ SET_DEFAULT_FOP (fallocate);
+ SET_DEFAULT_FOP (discard);
+ SET_DEFAULT_FOP (zerofill);
SET_DEFAULT_FOP (getspec);
@@ -119,18 +122,9 @@ xlator_volopt_dynload (char *xlator_type, void **dl_handle,
int ret = -1;
char *name = NULL;
void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
GF_VALIDATE_OR_GOTO ("xlator", xlator_type, out);
- GF_ASSERT (dl_handle);
-
- if (*dl_handle)
- if (dlclose (*dl_handle))
- gf_log ("xlator", GF_LOG_WARNING, "Unable to close "
- "previously opened handle( may be stale)."
- "Ignoring the invalid handle");
-
ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xlator_type);
if (-1 == ret) {
gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
@@ -146,28 +140,20 @@ xlator_volopt_dynload (char *xlator_type, void **dl_handle,
gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
goto out;
}
- *dl_handle = handle;
-
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
-
- if (!vol_opt) {
- goto out;
- }
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
+ if (!(opt_list->given_opt = dlsym (handle, "options"))) {
dlerror ();
- gf_log ("xlator", GF_LOG_DEBUG,
- "Strict option validation not enforced -- neglecting");
+ gf_log ("xlator", GF_LOG_ERROR,
+ "Failed to load xlator opt table");
+ goto out;
}
- opt_list->given_opt = vol_opt->given_opt;
- INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &opt_list->list);
+ *dl_handle = handle;
ret = 0;
out:
+ GF_FREE (name);
+
gf_log ("xlator", GF_LOG_DEBUG, "Returning %d", ret);
return ret;
@@ -181,7 +167,7 @@ xlator_dynload (xlator_t *xl)
char *name = NULL;
void *handle = NULL;
volume_opt_list_t *vol_opt = NULL;
-
+ class_methods_t *vtbl = NULL;
GF_VALIDATE_OR_GOTO ("xlator", xl, out);
@@ -216,21 +202,42 @@ xlator_dynload (xlator_t *xl)
goto out;
}
- if (!(xl->init = dlsym (handle, "init"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
- dlerror ());
- goto out;
- }
+ /*
+ * If class_methods exists, its contents override any definitions of
+ * init or fini for that translator. Otherwise, we fall back to the
+ * older method of looking for init and fini directly.
+ */
+ vtbl = dlsym(handle,"class_methods");
+ if (vtbl) {
+ xl->init = vtbl->init;
+ xl->fini = vtbl->fini;
+ xl->reconfigure = vtbl->reconfigure;
+ xl->notify = vtbl->notify;
+ }
+ else {
+ if (!(*VOID(&xl->init) = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
+ goto out;
+ }
- if (!(xl->fini = dlsym (handle, "fini"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
- dlerror ());
- goto out;
- }
+ if (!(*VOID(&(xl->fini)) = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
+ goto out;
+ }
+ if (!(*VOID(&(xl->reconfigure)) = dlsym (handle,
+ "reconfigure"))) {
+ gf_log ("xlator", GF_LOG_TRACE,
+ "dlsym(reconfigure) on %s -- neglecting",
+ dlerror());
+ }
+ if (!(*VOID(&(xl->notify)) = dlsym (handle, "notify"))) {
+ gf_log ("xlator", GF_LOG_TRACE,
+ "dlsym(notify) on %s -- neglecting",
+ dlerror ());
+ }
- if (!(xl->notify = dlsym (handle, "notify"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(notify) on %s -- neglecting", dlerror ());
}
if (!(xl->dumpops = dlsym (handle, "dumpops"))) {
@@ -238,18 +245,12 @@ xlator_dynload (xlator_t *xl)
"dlsym(dumpops) on %s -- neglecting", dlerror ());
}
- if (!(xl->mem_acct_init = dlsym (handle, "mem_acct_init"))) {
+ if (!(*VOID(&(xl->mem_acct_init)) = dlsym (handle, "mem_acct_init"))) {
gf_log (xl->name, GF_LOG_TRACE,
"dlsym(mem_acct_init) on %s -- neglecting",
dlerror ());
}
- if (!(xl->reconfigure = dlsym (handle, "reconfigure"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(reconfigure) on %s -- neglecting",
- dlerror());
- }
-
vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
gf_common_mt_volume_opt_list_t);
@@ -262,6 +263,7 @@ xlator_dynload (xlator_t *xl)
gf_log (xl->name, GF_LOG_TRACE,
"Strict option validation not enforced -- neglecting");
}
+ INIT_LIST_HEAD (&vol_opt->list);
list_add_tail (&vol_opt->list, &xl->volume_options);
fill_defaults (xl);
@@ -269,8 +271,7 @@ xlator_dynload (xlator_t *xl)
ret = 0;
out:
- if (name)
- GF_FREE (name);
+ GF_FREE (name);
return ret;
}
@@ -320,6 +321,24 @@ out:
}
+void
+xlator_foreach_depth_first (xlator_t *this,
+ void (*fn)(xlator_t *each, void *data),
+ void *data)
+{
+ xlator_list_t *subv = NULL;
+
+ subv = this->children;
+
+ while (subv) {
+ xlator_foreach_depth_first (subv->xlator, fn, data);
+ subv = subv->next;
+ }
+
+ fn (this, data);
+}
+
+
xlator_t *
xlator_search_by_name (xlator_t *any, const char *name)
{
@@ -343,7 +362,6 @@ out:
return search;
}
-
static int
__xlator_init(xlator_t *xl)
{
@@ -459,12 +477,12 @@ xlator_mem_acct_init (xlator_t *xl, int num_types)
int i = 0;
int ret = 0;
- if (!gf_mem_acct_is_enabled())
- return 0;
-
if (!xl)
return -1;
+ if (!xl->ctx->mem_acct_enable)
+ return 0;
+
xl->mem_acct.num_types = num_types;
xl->mem_acct.rec = CALLOC(num_types, sizeof(struct mem_acct_rec));
@@ -498,10 +516,26 @@ out:
return;
}
+int
+xlator_list_destroy (xlator_list_t *list)
+{
+ xlator_list_t *next = NULL;
+
+ while (list) {
+ next = list->next;
+ GF_FREE (list);
+ list = next;
+ }
+
+ return 0;
+}
+
int
xlator_tree_free (xlator_t *tree)
{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
xlator_t *trav = tree;
xlator_t *prev = tree;
@@ -512,9 +546,19 @@ xlator_tree_free (xlator_t *tree)
while (prev) {
trav = prev->next;
- dict_destroy (prev->options);
+ if (prev->dlhandle)
+ dlclose (prev->dlhandle);
+ dict_unref (prev->options);
GF_FREE (prev->name);
GF_FREE (prev->type);
+ xlator_list_destroy (prev->children);
+ xlator_list_destroy (prev->parents);
+
+ list_for_each_entry_safe (vol_opt, tmp, &prev->volume_options,
+ list) {
+ list_del_init (&vol_opt->list);
+ GF_FREE (vol_opt);
+ }
GF_FREE (prev);
prev = trav;
}
@@ -580,6 +624,31 @@ out:
return ret;
}
+void
+loc_gfid (loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ uuid_clear (gfid);
+
+ if (!loc)
+ goto out;
+ else if (!uuid_is_null (loc->gfid))
+ uuid_copy (gfid, loc->gfid);
+ else if (loc->inode && (!uuid_is_null (loc->inode->gfid)))
+ uuid_copy (gfid, loc->inode->gfid);
+out:
+ return;
+}
+
+char*
+loc_gfid_utoa (loc_t *loc)
+{
+ uuid_t gfid;
+ loc_gfid (loc, gfid);
+ return uuid_utoa (gfid);
+}
+
int
loc_copy (loc_t *dst, loc_t *src)
{
@@ -619,22 +688,17 @@ err:
return ret;
}
-
-int
-xlator_list_destroy (xlator_list_t *list)
+gf_boolean_t
+loc_is_root (loc_t *loc)
{
- xlator_list_t *next = NULL;
-
- while (list) {
- next = list->next;
- GF_FREE (list);
- list = next;
+ if (loc && __is_root_gfid (loc->gfid)) {
+ return _gf_true;
+ } else if (loc && loc->inode && __is_root_gfid (loc->inode->gfid)) {
+ return _gf_true;
}
-
- return 0;
+ return _gf_false;
}
-
int
xlator_destroy (xlator_t *xl)
{
@@ -644,10 +708,8 @@ xlator_destroy (xlator_t *xl)
if (!xl)
return 0;
- if (xl->name)
- GF_FREE (xl->name);
- if (xl->type)
- GF_FREE (xl->type);
+ GF_FREE (xl->name);
+ GF_FREE (xl->type);
if (xl->dlhandle)
dlclose (xl->dlhandle);
if (xl->options)
@@ -722,7 +784,7 @@ is_gf_log_command (xlator_t *this, const char *name, char *value)
goto out;
}
- ctx = glusterfs_ctx_get();
+ ctx = this->ctx;
if (!ctx)
goto out;
if (!ctx->active)
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index b7b59fac9..b57e5873e 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -66,6 +66,7 @@ typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data,
#include "globals.h"
#include "iatt.h"
#include "options.h"
+#include "client_t.h"
struct _loc {
@@ -417,6 +418,30 @@ typedef int32_t (*fop_fsetattr_cbk_t) (call_frame_t *frame,
struct iatt *preop_stbuf,
struct iatt *postop_stbuf, dict_t *xdata);
+typedef int32_t (*fop_fallocate_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_discard_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_cbk_t) (call_frame_t *frame,
+ void *cookie,
+ xlator_t *this,
+ int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
typedef int32_t (*fop_lookup_t) (call_frame_t *frame,
xlator_t *this,
loc_t *loc,
@@ -634,6 +659,26 @@ typedef int32_t (*fop_fsetattr_t) (call_frame_t *frame,
struct iatt *stbuf,
int32_t valid, dict_t *xdata);
+typedef int32_t (*fop_fallocate_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ int32_t keep_size,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
+
+typedef int32_t (*fop_discard_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
+typedef int32_t (*fop_zerofill_t) (call_frame_t *frame,
+ xlator_t *this,
+ fd_t *fd,
+ off_t offset,
+ size_t len,
+ dict_t *xdata);
struct xlator_fops {
fop_lookup_t lookup;
@@ -678,6 +723,9 @@ struct xlator_fops {
fop_setattr_t setattr;
fop_fsetattr_t fsetattr;
fop_getspec_t getspec;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
/* these entries are used for a typechecking hack in STACK_WIND _only_ */
fop_lookup_cbk_t lookup_cbk;
@@ -722,6 +770,9 @@ struct xlator_fops {
fop_setattr_cbk_t setattr_cbk;
fop_fsetattr_cbk_t fsetattr_cbk;
fop_getspec_cbk_t getspec_cbk;
+ fop_fallocate_cbk_t fallocate_cbk;
+ fop_discard_cbk_t discard_cbk;
+ fop_zerofill_cbk_t zerofill_cbk;
};
typedef int32_t (*cbk_forget_t) (xlator_t *this,
@@ -730,10 +781,17 @@ typedef int32_t (*cbk_forget_t) (xlator_t *this,
typedef int32_t (*cbk_release_t) (xlator_t *this,
fd_t *fd);
+typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
+
struct xlator_cbks {
- cbk_forget_t forget;
- cbk_release_t release;
- cbk_release_t releasedir;
+ cbk_forget_t forget;
+ cbk_release_t release;
+ cbk_release_t releasedir;
+ cbk_invalidate_t invalidate;
+ cbk_client_t client_destroy;
+ cbk_client_t client_disconnect;
};
typedef int32_t (*dumpop_priv_t) (xlator_t *this);
@@ -821,8 +879,17 @@ struct _xlator {
/* for the memory pool of 'frame->local' */
struct mem_pool *local_pool;
+ gf_boolean_t is_autoloaded;
};
+typedef struct {
+ int32_t (*init) (xlator_t *this);
+ void (*fini) (xlator_t *this);
+ int32_t (*reconfigure) (xlator_t *this,
+ dict_t *options);
+ event_notify_fn_t notify;
+} class_methods_t;
+
#define xlator_has_parent(xl) (xl->parents != NULL)
#define XLATOR_NOTIFY(_xl, params ...) \
@@ -860,6 +927,11 @@ void xlator_foreach (xlator_t *this,
void *data),
void *data);
+void xlator_foreach_depth_first (xlator_t *this,
+ void (*fn) (xlator_t *each,
+ void *data),
+ void *data);
+
xlator_t *xlator_search_by_name (xlator_t *any, const char *name);
void inode_destroy_notify (inode_t *inode, const char *xlname);
@@ -868,10 +940,21 @@ int loc_copy (loc_t *dst, loc_t *src);
#define loc_dup(src, dst) loc_copy(dst, src)
void loc_wipe (loc_t *loc);
int loc_path (loc_t *loc, const char *bname);
+void loc_gfid (loc_t *loc, uuid_t gfid);
+char* loc_gfid_utoa (loc_t *loc);
+gf_boolean_t loc_is_root (loc_t *loc);
int xlator_mem_acct_init (xlator_t *xl, int num_types);
int is_gf_log_command (xlator_t *trans, const char *name, char *value);
int glusterd_check_log_level (const char *value);
int xlator_volopt_dynload (char *xlator_type, void **dl_handle,
volume_opt_list_t *vol_opt_handle);
-int32_t glusterfs_rebalance_event_notify (dict_t *dict);
+enum gf_hdsk_event_notify_op {
+ GF_EN_DEFRAG_STATUS,
+ GF_EN_MAX,
+};
+gf_boolean_t
+is_graph_topology_equal (glusterfs_graph_t *graph1, glusterfs_graph_t *graph2);
+int
+glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp,
+ glusterfs_ctx_t *ctx, const char *oldvolfile);
#endif /* _XLATOR_H */
diff --git a/libglusterfsclient/src/Makefile.am b/libglusterfsclient/src/Makefile.am
deleted file mode 100644
index 32811c0d5..000000000
--- a/libglusterfsclient/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libglusterfsclient.la
-noinst_HEADERS = libglusterfsclient-internals.h
-libglusterfsclient_HEADERS = libglusterfsclient.h
-libglusterfsclientdir = $(includedir)
-
-libglusterfsclient_la_SOURCES = libglusterfsclient.c libglusterfsclient-dentry.c
-libglusterfsclient_la_CFLAGS = -fPIC -Wall
-libglusterfsclient_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-libglusterfsclient_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D$(GF_HOST_OS) -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_CFLAGS)
-libglusterfsclient_la_LDFLAGS = -shared -nostartfiles
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
-
diff --git a/libglusterfsclient/src/libglusterfsclient-dentry.c b/libglusterfsclient/src/libglusterfsclient-dentry.c
deleted file mode 100644
index 3fa5f6e1e..000000000
--- a/libglusterfsclient/src/libglusterfsclient-dentry.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include <libgen.h>
-
-#define LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE(_new_loc, _loc, _parent, \
- _resolved) do { \
- size_t pathlen = 0; \
- size_t resolvedlen = 0; \
- char *path = NULL; \
- int pad = 0; \
- pathlen = strlen (_loc->path) + 1; \
- path = CALLOC (1, pathlen); \
- _new_loc.parent = _parent; \
- resolvedlen = strlen (_resolved); \
- strncpy (path, _resolved, resolvedlen); \
- if (resolvedlen == 1) /* only root resolved */ \
- pad = 0; \
- else { \
- pad = 1; \
- path[resolvedlen] = '/'; \
- } \
- strcpy_till (path + resolvedlen + pad, \
- loc->path + resolvedlen + pad, '/'); \
- _new_loc.path = path; \
- _new_loc.name = strrchr (path, '/'); \
- if (_new_loc.name) \
- _new_loc.name++; \
- }while (0);
-
-
-/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
- * @dest - destination string
- * @dname - source string
- * @delim - delimiter character
- *
- * return - NULL is returned if '0' is encountered in @dname, otherwise returns
- * a pointer to remaining string begining in @dest.
- */
-static char *
-strcpy_till (char *dest, const char *dname, char delim)
-{
- char *src = NULL;
- int idx = 0;
- char *ret = NULL;
-
- src = (char *)dname;
- while (src[idx] && (src[idx] != delim)) {
- dest[idx] = src[idx];
- idx++;
- }
-
- dest[idx] = 0;
-
- if (src[idx] == 0)
- ret = NULL;
- else
- ret = &(src[idx]);
-
- return ret;
-}
-
-/* __libgf_client_path_to_parenti - derive parent inode for @path. if immediate
- * parent is not available in the dentry cache, return nearest
- * available parent inode and set @reslv to the path of
- * the returned directory.
- *
- * @itable - inode table
- * @path - path whose parent has to be looked up.
- * @reslv - if immediate parent is not available, reslv will be set to path of the
- * resolved parent.
- *
- * return - should never return NULL. should at least return '/' inode.
- */
-static inode_t *
-__libgf_client_path_to_parenti (libglusterfs_client_ctx_t *ctx,
- inode_table_t *itable, const char *path,
- char **reslv)
-{
- char *resolved_till = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char *next_component = NULL;
- char *pathdup = NULL;
- inode_t *curr = NULL;
- inode_t *parent = NULL;
- size_t pathlen = 0;
- loc_t rootloc = {0, };
- int ret = -1;
-
- pathlen = STRLEN_0 (path);
- resolved_till = CALLOC (1, pathlen);
-
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", resolved_till, out);
- pathdup = strdup (path);
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", pathdup, out);
-
- parent = inode_ref (itable->root);
- /* If the root inode's is outdated, send a revalidate on it.
- * A revalidate on root inode also reduces the window in which an
- * op will fail over distribute because the layout of the root
- * directory did not get constructed when we sent the lookup on
- * root in glusterfs_init. That can happen when not all children of a
- * distribute volume were up at the time of glusterfs_init.
- */
- if (!libgf_is_iattr_cache_valid (ctx, parent, NULL,
- LIBGF_VALIDATE_LOOKUP)) {
- libgf_client_loc_fill (&rootloc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &rootloc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient-dentry", GF_LOG_ERROR,
- "Root inode revalidation failed");
- inode_unref (parent);
- parent = NULL;
- goto out;
- }
- libgf_client_loc_wipe (&rootloc);
- }
-
- curr = NULL;
-
- component = strtok_r (pathdup, "/", &strtokptr);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
- if (!curr) {
- break;
- }
- if (!libgf_is_iattr_cache_valid (ctx, curr, NULL,
- LIBGF_VALIDATE_LOOKUP))
- break;
-
- /* It is OK to append the component even if it is the
- last component in the path, because, if 'next_component'
- returns NULL, @parent will remain the same and
- @resolved_till will not be sent back
- */
- strcat (resolved_till, "/");
- strcat (resolved_till, component);
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- /* will break */
- inode_unref (curr);
- }
-
- component = next_component;
- }
-
- if (resolved_till[0] == '\0') {
- strcat (resolved_till, "/");
- }
-
- free (pathdup);
-
- if (reslv) {
- *reslv = resolved_till;
- } else {
- FREE (resolved_till);
- }
-
-out:
- return parent;
-}
-
-static inline void
-libgf_client_update_resolved (const char *path, char *resolved)
-{
- int32_t pathlen = 0;
- char *tmp = NULL, *dest = NULL, *dname = NULL;
- char append_slash = 0;
-
- pathlen = strlen (resolved);
- tmp = (char *)(resolved + pathlen);
- if (*((char *) (resolved + pathlen - 1)) != '/') {
- tmp[0] = '/';
- append_slash = 1;
- }
-
- if (append_slash) {
- dest = tmp + 1;
- } else {
- dest = tmp;
- }
-
- if (*((char *) path + pathlen) == '/') {
- dname = (char *) path + pathlen + 1;
- } else {
- dname = (char *) path + pathlen;
- }
-
- strcpy_till (dest, dname, '/');
-}
-
-/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
- * update the dentry cache
- *
- * @loc - loc to resolve.
- * @ctx - libglusterfsclient context
- * @lookup_basename - flag whether to lookup basename(loc->path)
- *
- * return - 0 on success
- * -1 on failure
- *
- */
-static int32_t
-__do_path_resolve (loc_t *loc, libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- int32_t op_ret = -1;
- char *resolved = NULL;
- inode_t *parent = NULL, *inode = NULL;
- dentry_t *dentry = NULL;
- loc_t new_loc = {0, };
- char *pathname = NULL, *directory = NULL;
- char *file = NULL;
-
- parent = loc->parent;
- if (parent) {
- inode_ref (parent);
- gf_log ("libglusterfsclient-dentry", GF_LOG_DEBUG,
- "loc->parent(%"PRId64") already present. sending "
- "lookup for %"PRId64"/%s", parent->ino, parent->ino,
- loc->path);
- resolved = strdup (loc->path);
- resolved = dirname (resolved);
- } else {
- parent = __libgf_client_path_to_parenti (ctx, ctx->itable,
- loc->path, &resolved);
- }
-
- if (parent == NULL) {
- /* fire in the bush.. run! run!! run!!! */
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_CRITICAL,
- "failed to get parent inode number");
- op_ret = -1;
- goto out;
- }
-
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_DEBUG,
- "resolved path(%s) till %"PRId64"(%s). "
- "sending lookup for remaining path",
- loc->path, parent->ino, resolved);
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- pathname = NULL;
-
- while (strcmp (resolved, directory) != 0)
- {
- dentry = NULL;
-
- LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE (new_loc, loc, parent,
- resolved);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- pathname = strdup (new_loc.path);
- file = basename (pathname);
-
- new_loc.inode = inode_search (ctx->itable, parent->ino, file);
- if (new_loc.inode) {
- if (libgf_is_iattr_cache_valid (ctx, new_loc.inode,
- NULL,
- LIBGF_VALIDATE_LOOKUP))
- dentry = dentry_search_for_inode (new_loc.inode,
- parent->ino,
- file);
- }
-
- if (dentry == NULL) {
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- inode_ref (new_loc.parent);
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
- }
-
- parent = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
-
- libgf_client_update_resolved (loc->path, resolved);
- }
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- if (lookup_basename) {
- pathname = strdup (loc->path);
- file = basename (pathname);
-
- inode = inode_search (ctx->itable, parent->ino, file);
- if (!inode) {
- libgf_client_loc_fill (&new_loc, ctx, 0, parent->ino,
- file);
-
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
-
- inode = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
- }
- }
-
- op_ret = 0;
-out:
- loc->inode = inode;
- loc->parent = parent;
-
- FREE (resolved);
- if (pathname) {
- FREE (pathname);
- }
-
- if (directory) {
- FREE (directory);
- }
-
- return op_ret;
-}
-
-
-/* resolves loc->path to loc->parent and loc->inode */
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- char *pathname = NULL;
- char *directory = NULL;
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t op_ret = 0;
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- parent = inode_from_path (ctx->itable, directory);
-
- if (parent != NULL) {
- loc->parent = parent;
-
- if (!lookup_basename) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved dirname(%s) to %"PRId64,
- loc->path, parent->ino);
- goto out;
- } else {
- inode = inode_from_path (ctx->itable, loc->path);
- if (inode != NULL) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved path(%s) to %"PRId64"/%"PRId64,
- loc->path, parent->ino, inode->ino);
- loc->inode = inode;
- goto out;
- }
- }
- }
-
- if (parent) {
- inode_unref (parent);
- } else if (inode) {
- inode_unref (inode);
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "undesired behaviour. inode(%"PRId64") for %s "
- "exists without parent (%s)",
- inode->ino, loc->path, directory);
- }
- op_ret = __do_path_resolve (loc, ctx, lookup_basename);
-out:
- if (pathname)
- free (pathname);
-
- return op_ret;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h
deleted file mode 100755
index 7b62ce8ef..000000000
--- a/libglusterfsclient/src/libglusterfsclient-internals.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LIBGLUSTERFSCLIENT_INTERNALS_H
-#define __LIBGLUSTERFSCLIENT_INTERNALS_H
-
-#include <glusterfs.h>
-#include <logging.h>
-#include <inode.h>
-#include <pthread.h>
-#include <stack.h>
-#include <list.h>
-#include <signal.h>
-#include <call-stub.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <fd.h>
-#include <dirent.h>
-
-#define LIBGF_IOBUF_SIZE (128 *GF_UNIT_KB)
-typedef void (*sighandler_t) (int);
-typedef struct list_head list_head_t;
-
-typedef struct libglusterfs_client_ctx {
- glusterfs_ctx_t gf_ctx;
- inode_table_t *itable;
- pthread_t reply_thread;
- call_pool_t pool;
- uint32_t counter;
- time_t lookup_timeout;
- time_t stat_timeout;
- /* We generate a fake fsid for the subvolume being
- * accessed through this context.
- */
- dev_t fake_fsid;
- pid_t pid;
-}libglusterfs_client_ctx_t;
-
-typedef struct signal_handler {
- int signo;
- sighandler_t handler;
- list_head_t next;
-}libgf_client_signal_handler_t ;
-
-typedef struct {
- pthread_mutex_t lock;
- pthread_cond_t reply_cond;
- call_stub_t *reply_stub;
- char complete;
- union {
- struct {
- char is_revalidate;
- loc_t *loc;
- int32_t size;
- } lookup;
- }fop;
- fd_t *fd; /* Needed here because we need a ref to the dir
- fd in the libgf_client_readdir_cbk in order
- to process the dirents received, without
- having them added to the reply stub.
- Also used in updating iattr cache. See
- readv_cbk for eg.
- */
-}libgf_client_local_t;
-
-typedef struct {
- pthread_cond_t init_con_established;
- pthread_mutex_t lock;
- char complete;
-}libglusterfs_client_private_t;
-
-typedef struct {
- pthread_mutex_t lock;
- uint32_t previous_lookup_time;
- uint32_t previous_stat_time;
- struct iatt stbuf;
-} libglusterfs_client_inode_ctx_t;
-
-/* Our dirent cache is very simplistic when it comes to directory
- * reading workloads. It assumes that all directory traversal operations happen
- * sequentially and that readdir callers dont go jumping around the directory
- * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir
- * API in libglusterfsclient only set the offset. The consequence is that when
- * libgf_dcache_readdir finds that the offset presented to it, is not
- * the same as the offset of the previous dirent returned by dcache (..stored
- * in struct direntcache->prev_off..), it realises that a non-sequential
- * directory read is in progress and returns 0 to signify that the cache is
- * not valid.
- * This could be made a bit more intelligent by using a data structure like
- * a hash-table or a balanced binary tree that allows us to search for the
- * existence of particular offsets in the cache without performing a list or
- * array traversal.
- * Dont use a simple binary search tree because
- * there is no guarantee that offsets in a sequential reading of the directory
- * will be just random integers. If for some reason they are sequential, a BST
- * will end up becoming a list.
- */
-struct direntcache {
- gf_dirent_t entries; /* Head of list of cached dirents. */
- gf_dirent_t *next; /* Pointer to the next entry that
- * should be sent by readdir */
- uint64_t prev_off; /* Offset where the next read will
- * happen.
- */
-};
-
-typedef struct {
- pthread_mutex_t lock;
- off_t offset;
- libglusterfs_client_ctx_t *ctx;
- /* `man readdir` says readdir is non-re-entrant
- * only if two readdirs are racing on the same
- * handle.
- */
- struct dirent dirp;
- struct direntcache *dcache;
- char vpath[PATH_MAX];
-} libglusterfs_client_fd_ctx_t;
-
-typedef struct libglusterfs_client_async_local {
- void *cbk_data;
- union {
- struct {
- fd_t *fd;
- glusterfs_readv_cbk_t cbk;
- char update_offset;
- }readv_cbk;
-
- struct {
- fd_t *fd;
- glusterfs_write_cbk_t cbk;
- }write_cbk;
-
- struct {
- fd_t *fd;
- }close_cbk;
-
- struct {
- void *buf;
- size_t size;
- loc_t *loc;
- char is_revalidate;
- glusterfs_get_cbk_t cbk;
- }lookup_cbk;
- }fop;
-}libglusterfs_client_async_local_t;
-
-#define LIBGF_STACK_WIND_AND_WAIT(frame, rfn, obj, fn, params ...) \
- do { \
- STACK_WIND (frame, rfn, obj, fn, params); \
- pthread_mutex_lock (&local->lock); \
- { \
- while (!local->complete) { \
- pthread_cond_wait (&local->reply_cond, \
- &local->lock); \
- } \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-#define LIBGF_CLIENT_SIGNAL(signal_handler_list, signo, handler) \
- do { \
- libgf_client_signal_handler_t *libgf_handler = CALLOC (1, \
- sizeof (*libgf_handler)); \
- ERR_ABORT (libgf_handler); \
- libgf_handler->signo = signo; \
- libgf_handler->handler = signal (signo, handler); \
- list_add (&libgf_handler->next, signal_handler_list); \
- } while (0)
-
-#define LIBGF_INSTALL_SIGNAL_HANDLERS(signal_handlers) \
- do { \
- INIT_LIST_HEAD (&signal_handlers); \
- /* Handle SIGABORT and SIGSEGV */ \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGSEGV, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGABRT, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGHUP, gf_log_logrotate); \
- /* LIBGF_CLIENT_SIGNAL (SIGTERM, glusterfs_cleanup_and_exit); */ \
- } while (0)
-
-#define LIBGF_RESTORE_SIGNAL_HANDLERS(local) \
- do { \
- libgf_client_signal_handler_t *ptr = NULL, *tmp = NULL; \
- list_for_each_entry_safe (ptr, tmp, &local->signal_handlers,\
- next) { \
- signal (ptr->signo, ptr->handler); \
- FREE (ptr); \
- } \
- } while (0)
-
-#define LIBGF_CLIENT_FOP_ASYNC(ctx, local, ret_fn, op, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- frame->root->state = ctx; \
- frame->local = local; \
- STACK_WIND (frame, ret_fn, xl, xl->fops->op, args); \
- } while (0)
-
-#define LIBGF_CLIENT_FOP(ctx, stub, op, local, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- if (!local) { \
- local = CALLOC (1, sizeof (*local)); \
- } \
- ERR_ABORT (local); \
- frame->local = local; \
- frame->root->state = ctx; \
- pthread_cond_init (&local->reply_cond, NULL); \
- pthread_mutex_init (&local->lock, NULL); \
- LIBGF_STACK_WIND_AND_WAIT (frame, libgf_client_##op##_cbk, xl, \
- xl->fops->op, args); \
- stub = local->reply_stub; \
- FREE (frame->local); \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- } while (0)
-
-#define LIBGF_REPLY_NOTIFY(local) \
- do { \
- pthread_mutex_lock (&local->lock); \
- { \
- local->complete = 1; \
- pthread_cond_broadcast (&local->reply_cond); \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-void
-libgf_client_loc_wipe (loc_t *loc);
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name);
-
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename);
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req);
-
-/* We're not expecting more than 10-15
- * VMPs per process so a list is acceptable.
- */
-struct vmp_entry {
- struct list_head list;
- char * vmp;
- int vmplen;
- glusterfs_handle_t handle;
-};
-
-#define LIBGF_UPDATE_LOOKUP 0x1
-#define LIBGF_UPDATE_STAT 0x2
-#define LIBGF_UPDATE_ALL (LIBGF_UPDATE_LOOKUP | LIBGF_UPDATE_STAT)
-
-#define LIBGF_VALIDATE_LOOKUP 0x1
-#define LIBGF_VALIDATE_STAT 0x2
-
-#define LIBGF_INVALIDATE_LOOKUP 0x1
-#define LIBGF_INVALIDATE_STAT 0x2
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags);
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf);
-
-#endif
diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c
deleted file mode 100755
index aaff49416..000000000
--- a/libglusterfsclient/src/libglusterfsclient.c
+++ /dev/null
@@ -1,8160 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <errno.h>
-#include <libgen.h>
-#include <stddef.h>
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include <sys/statfs.h>
-#endif
-#include <unistd.h>
-#include <xlator.h>
-#include <timer.h>
-#include "defaults.h"
-#include <time.h>
-#include <poll.h>
-#include "transport.h"
-#include "event.h"
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include "compat.h"
-#include "compat-errno.h"
-#ifndef GF_SOLARIS_HOST_OS
-#include <sys/vfs.h>
-#endif
-#include <utime.h>
-#include <sys/param.h>
-#include <list.h>
-#include <stdarg.h>
-#include <sys/statvfs.h>
-#include "hashfn.h"
-#include <sys/select.h>
-
-#define LIBGF_XL_NAME "libglusterfsclient"
-#define LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT 1000 //14057
-#define LIBGF_SENDFILE_BLOCK_SIZE 4096
-#define LIBGF_READDIR_BLOCK 4096
-#define libgf_path_absolute(path) ((path)[0] == '/')
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph);
-int32_t libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- char *buf, size_t bufsize);
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc);
-static int first_init = 1;
-
-/* The global list of virtual mount points */
-struct {
- struct list_head list;
- int entries;
-}vmplist;
-
-
-/* Protects the VMP list above. */
-pthread_mutex_t vmplock = PTHREAD_MUTEX_INITIALIZER;
-
-/* Ensures only one thread is ever calling glusterfs_mount.
- * Since that function internally calls routines which
- * use the yacc parser code using global vars, this process
- * needs to be syncronised.
- */
-pthread_mutex_t mountlock = PTHREAD_MUTEX_INITIALIZER;
-
-static char cwd[PATH_MAX];
-static char cwd_inited = 0;
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-char *
-libgf_vmp_virtual_path (struct vmp_entry *entry, const char *path, char *vpath)
-{
- char *tmp = NULL;
-
- tmp = ((char *)(path + (entry->vmplen-1)));
- if (strlen (tmp) > 0) {
- if (tmp[0] != '/') {
- vpath[0] = '/';
- vpath[1] = '\0';
- strcat (&vpath[1], tmp);
- } else
- strcpy (vpath, tmp);
- } else {
- vpath[0] = '/';
- vpath[1] = '\0';
- }
-
- return vpath;
-}
-
-char *
-zr_build_process_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- struct tm now = {0, };
- char now_str[32];
-
- if (-1 == gettimeofday(&tv, NULL)) {
- gf_log ("", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (-1 == gethostname (hostname, 256)) {
- gf_log ("", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- localtime_r (&tv.tv_sec, &now);
- strftime (now_str, 32, "%Y/%m/%d-%H:%M:%S", &now);
- snprintf (tmp_str, 1024, "%s-%d-%s:%ld",
- hostname, getpid(), now_str, tv.tv_usec);
-
- return strdup (tmp_str);
-}
-
-
-int32_t
-libgf_client_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t ptr = 0;
- libglusterfs_client_inode_ctx_t *ctx = NULL;
-
- inode_ctx_del (inode, this, &ptr);
- ctx = (libglusterfs_client_inode_ctx_t *)(long) ptr;
-
- FREE (ctx);
-
- return 0;
-}
-
-xlator_t *
-libgf_inode_to_xlator (inode_t *inode)
-{
- if (!inode)
- return NULL;
-
- if (!inode->table)
- return NULL;
-
- if (!inode->table->xl)
- return NULL;
-
- if (!inode->table->xl->ctx)
- return NULL;
-
- return inode->table->xl->ctx->top;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_get_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_get (fd, libgf_inode_to_xlator (fd->inode), &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *vpath)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- uint64_t ctxaddr = 0;
-
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (fdctx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- fdctx = NULL;
- goto out;
- }
-
- pthread_mutex_init (&fdctx->lock, NULL);
- fdctx->ctx = ctx;
- ctxaddr = (uint64_t) (long)fdctx;
-
- if (fd->inode) {
- if (IA_ISDIR (fd->inode->ia_type)) {
- fdctx->dcache = CALLOC (1, sizeof (struct direntcache));
- if (fdctx->dcache)
- INIT_LIST_HEAD (&fdctx->dcache->entries.list);
- /* If the calloc fails, we can still continue
- * working as the dcache is not required for correct
- * operation.
- */
- }
- }
-
- if (vpath != NULL) {
- strcpy (fdctx->vpath, vpath);
- if (vpath[strlen(vpath) - 1] != '/') {
- strcat (fdctx->vpath, "/");
- }
- }
-
- fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr);
-out:
- return fdctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_del_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_del (fd, libgf_inode_to_xlator (fd->inode) , &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-void
-libgf_dcache_invalidate (fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd)
- return;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- return;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- return;
- }
-
- if (!list_empty (&fd_ctx->dcache->entries.list))
- gf_dirent_free (&fd_ctx->dcache->entries);
-
- INIT_LIST_HEAD (&fd_ctx->dcache->entries.list);
-
- fd_ctx->dcache->next = NULL;
- fd_ctx->dcache->prev_off = 0;
-
- return;
-}
-
-/* The first entry in the entries is always a placeholder
- * or the list head. The real entries begin from entries->next.
- */
-int
-libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- gf_dirent_t *entries)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int op_ret = -1;
-
- if ((!ctx) || (!fd) || (!entries)) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- /* dcache is not enabled. */
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- op_ret = 0;
- goto out;
- }
-
- /* If we're updating, we must begin with invalidating any previous
- * entries.
- */
- libgf_dcache_invalidate (fd);
-
- fd_ctx->dcache->next = entries->next;
- /* We still need to store a pointer to the head
- * so we start free'ing from the head when invalidation
- * is required.
- *
- * Need to delink the entries from the list
- * given to us by an underlying translators. Most translators will
- * free this list after this call so we must preserve the dirents in
- * order to cache them.
- */
- list_splice_init (&entries->list, &fd_ctx->dcache->entries.list);
- op_ret = 0;
-out:
- return op_ret;
-}
-
-int
-libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int cachevalid = 0;
-
- if ((!ctx) || (!fd) || (!dirp) || (!offset))
- return 0;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- goto out;
- }
-
- /* We've either run out of entries in the cache
- * or the cache is empty.
- */
- if (!fd_ctx->dcache->next) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No entries present");
- goto out;
- }
-
- /* The dirent list is created as a circular linked list
- * so this check is needed to ensure, we dont start
- * reading old entries again.
- * If we're reached this situation, the cache is exhausted
- * and we'll need to pre-fetch more entries to continue serving.
- */
- if (fd_ctx->dcache->next == &fd_ctx->dcache->entries) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries exhausted");
- goto out;
- }
-
- /* During sequential reading we generally expect that the offset
- * requested is the same as the offset we served in the previous call
- * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate
- * require special handling because seekdir/rewinddir change the offset
- * in the fd_ctx and libgf_dcache_invalidate changes the prev_off.
- */
- if (*offset != fd_ctx->dcache->prev_off) {
- /* For all cases of the if branch above, we know that the
- * cache is now invalid except for the case below. It handles
- * the case where the two offset values above are different
- * but different because the previous readdir block was
- * exhausted, resulting in a prev_off being set to 0 in
- * libgf_dcache_invalidate, while the requested offset is non
- * zero because that is what we returned for the last dirent
- * of the previous readdir block.
- */
- if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries"
- " exhausted");
- cachevalid = 1;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Dcache"
- " invalidated previously");
- } else
- cachevalid = 1;
-
- if (!cachevalid)
- goto out;
-
- dirp->d_ino = fd_ctx->dcache->next->d_ino;
- strncpy (dirp->d_name, fd_ctx->dcache->next->d_name,
- fd_ctx->dcache->next->d_len);
-
- *offset = fd_ctx->dcache->next->d_off;
- dirp->d_off = *offset;
- fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off;
- fd_ctx->dcache->next = fd_ctx->dcache->next->next;
-
-out:
- return cachevalid;
-}
-
-
-int32_t
-libgf_client_release (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_get_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_get (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_del_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_del (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_alloc_inode_ctx (libglusterfs_client_ctx_t *ctx, inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- ictx = CALLOC (1, sizeof (*ictx));
- if (ictx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- goto out;
- }
-
- pthread_mutex_init (&ictx->lock, NULL);
- ctxaddr = (uint64_t) (long)ictx;
- if (inode_ctx_put (inode, libgf_inode_to_xlator (inode), ctxaddr) < 0){
- FREE (ictx);
- ictx = NULL;
- }
-
-out:
- return ictx;
-}
-
-int
-libgf_transform_iattr (libglusterfs_client_ctx_t *libctx, inode_t *inode,
- struct iatt *buf)
-{
-
- if ((!libctx) || (!buf) || (!inode))
- return -1;
-
- buf->ia_dev = libctx->fake_fsid;
- /* If the inode is root, the inode number must be 1 not the
- * ino received from the file system.
- */
- if ((inode->ino == 1) && (buf))
- buf->ia_ino = 1;
-
- return 0;
-}
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf)
-{
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- time_t current = 0;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- /* Take a timestamp only after we've acquired the
- * lock.
- */
- current = time (NULL);
- if (flags & LIBGF_UPDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating lookup");
- inode_ctx->previous_lookup_time = current;
- }
-
- if (flags & LIBGF_UPDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating stat");
-
- /* Update the cached stat struct only if a new
- * stat buf is given.
- */
- if (buf != NULL) {
- inode_ctx->previous_stat_time = current;
- memcpy (&inode_ctx->stbuf, buf,
- sizeof (inode_ctx->stbuf));
- }
- }
- }
- pthread_mutex_unlock (&inode_ctx->lock);
- op_ret = 0;
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_invalidate_iattr_cache (inode_t *inode, int flags)
-{
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- if (!inode)
- return -1;
-
- ictx = libgf_get_inode_ctx (inode);
- if (!ictx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- return -1;
- }
-
- pthread_mutex_lock (&ictx->lock);
- {
- if (flags & LIBGF_INVALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " lookup");
- ictx->previous_lookup_time = 0;
- }
-
- if (flags & LIBGF_INVALIDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " stat");
- ictx->previous_stat_time = 0;
- }
-
- }
- pthread_mutex_unlock (&ictx->lock);
-
- return 0;
-}
-
-
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags)
-{
- time_t current = 0;
- time_t prev = 0;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- int cache_valid = 0;
- time_t timeout = 0;
-
- if (inode == NULL)
- return 0;
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present\n");
- return 0;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- current = time (NULL);
- if (flags & LIBGF_VALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking lookup");
- prev = inode_ctx->previous_lookup_time;
- timeout = ctx->lookup_timeout;
- } else {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking stat");
- prev = inode_ctx->previous_stat_time;
- timeout = ctx->stat_timeout;
- }
-
- /* Even if the timeout is set to -1 to cache
- * infinitely, fops like write must invalidate the
- * stat cache because writev_cbk cannot update
- * the cache using the stat returned to it. This is
- * because write-behind can return a stat bufs filled
- * with zeroes.
- */
- if (prev == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache Invalid");
- goto iattr_unlock_out;
- }
-
- /* Cache infinitely */
- if (timeout == (time_t)-1) {
- cache_valid = 1;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Caching On and "
- "valid");
- goto iattr_unlock_out;
- }
-
- /* Disable caching completely */
- if (timeout == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache disabled");
- goto iattr_unlock_out;
- }
-
- if ((prev > 0) && (timeout >= (current - prev))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache valid");
- cache_valid = 1;
- }
-
- if (flags & LIBGF_VALIDATE_LOOKUP)
- goto iattr_unlock_out;
-
- if ((cache_valid) && (sbuf))
- *sbuf = inode_ctx->stbuf;
- }
-iattr_unlock_out:
- pthread_mutex_unlock (&inode_ctx->lock);
-
- return cache_valid;
-}
-
-int32_t
-libgf_client_releasedir (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-void *poll_proc (void *ptr)
-{
- glusterfs_ctx_t *ctx = ptr;
-
- event_dispatch (ctx->event_pool);
-
- return NULL;
-}
-
-
-int32_t
-xlator_graph_init (xlator_t *xl)
-{
- xlator_t *trav = xl;
- int32_t ret = -1;
-
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->ready) {
- ret = xlator_tree_init (trav);
- if (ret < 0)
- break;
- }
- trav = trav->next;
- }
-
- return ret;
-}
-
-
-void
-xlator_graph_fini (xlator_t *xl)
-{
- xlator_t *trav = xl;
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->init_succeeded) {
- break;
- }
-
- xlator_tree_fini (trav);
- trav = trav->next;
- }
-}
-
-/* Returns a pointer to the @n'th char matching
- * @c in string @str, starting the search from right or
- * end-of-string, rather than starting from left, as rindex
- * function does.
- */
-char *
-libgf_rrindex (char *str, int c, int n)
-{
- int len = 0;
- int occurrence = 0;
-
- if (str == NULL)
- return NULL;
-
- len = strlen (str);
- /* Point to last character of string. */
- str += (len - 1);
- while (len > 0) {
- if ((int)*str == c) {
- ++occurrence;
- if (occurrence == n)
- break;
- }
- --len;
- --str;
- }
-
- return str;
-}
-
-char *
-libgf_trim_to_prev_dir (char * path)
-{
- char *idx = NULL;
- int len = 0;
-
- if (!path)
- return NULL;
-
- /* Check if we're already at root, if yes
- * then there is no prev dir.
- */
- len = strlen (path);
- if (len == 1)
- return path;
-
- if (path[len - 1] == '/') {
- path[len - 1] = '\0';
- }
-
- idx = libgf_rrindex (path, '/', 1);
- /* Move to the char after the / */
- ++idx;
- *idx = '\0';
-
- return path;
-}
-
-
-char *
-libgf_prepend_cwd (const char *userpath, char *abspath, int size)
-{
- if ((!userpath) || (!abspath))
- return NULL;
-
- if (!getcwd (abspath, size))
- return NULL;
-
- strcat (abspath, "/");
- strcat (abspath, userpath);
-
- return abspath;
-}
-
-
-/* Performs a lightweight path resolution that only
- * looks for . and .. and replaces those with the
- * proper names.
- *
- * FIXME: This is a stop-gap measure till we have full
- * fledge path resolution going in here.
- * Function returns path strdup'ed so remember to FREE the
- * string as required.
- */
-char *
-libgf_resolve_path_light (char *path)
-{
- char *respath = NULL;
- char *saveptr = NULL;
- char *tok = NULL;
- int len = 0;
- int addslash = 0;
- char mypath[PATH_MAX];
-
- if (!path)
- goto out;
-
- memset (mypath, 0, PATH_MAX);
-
- if (!libgf_path_absolute (path))
- libgf_prepend_cwd (path, mypath, PATH_MAX);
- else
- strcpy (mypath, path);
-
- len = strlen (mypath);
- if (len == 0) {
- goto out;
- }
-
- respath = calloc (PATH_MAX, sizeof (char));
- if (respath == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- goto out;
- }
-
- /* The path only contains a / or a //, so simply add a /
- * and return.
- * This needs special handling because the loop below does
- * not allow us to do so through strtok.
- */
- if (((mypath[0] == '/') && (len == 1))
- || (strcmp (mypath, "//") == 0)) {
- strcat (respath, "/");
- goto out;
- }
-
- tok = strtok_r (mypath, "/", &saveptr);
- addslash = 0;
- strcat (respath, "/");
- while (tok) {
- if (addslash) {
- if ((strcmp (tok, ".") != 0)
- && (strcmp (tok, "..") != 0)) {
- strcat (respath, "/");
- }
- }
-
- if ((strcmp (tok, ".") != 0) && (strcmp (tok, "..") != 0)) {
- strcat (respath, tok);
- addslash = 1;
- } else if ((strcmp (tok, "..") == 0)) {
- libgf_trim_to_prev_dir (respath);
- addslash = 0;
- }
-
- tok = strtok_r (NULL, "/", &saveptr);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Path: %s, Resolved Path: %s",
- path, respath);
-out:
- return respath;
-}
-
-void
-libgf_client_loc_wipe (loc_t *loc)
-{
- if (loc->path) {
- FREE (loc->path);
- }
-
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- loc->path = loc->name = NULL;
- loc->ino = 0;
-}
-
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name)
-{
- inode_t *inode = NULL, *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
-
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
-
- inode = loc->inode;
-
- if (!inode) {
- if (ino)
- inode = inode_search (ctx->itable, ino, NULL);
-
- if (inode)
- goto inode_found;
-
- if (par && name)
- inode = inode_search (ctx->itable, par, name);
- }
-
-inode_found:
- if (inode) {
- loc->ino = inode->ino;
- loc->inode = inode;
- }
-
- parent = loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (ctx->itable, par, NULL);
- loc->parent = parent;
- }
-
- if (!loc->path) {
- if (name && parent) {
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- } else {
- loc->path = path;
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
- } else {
- loc->path = path;
- }
- }
- }
-
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else loc->name = "";
- }
-
- if ((ino != 1) &&
- (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_ERROR,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
- }
- ret = 0;
-fail:
- return ret;
-}
-
-
-static call_frame_t *
-get_call_frame_for_req (libglusterfs_client_ctx_t *ctx, char d)
-{
- call_pool_t *pool = ctx->gf_ctx.pool;
- xlator_t *this = ctx->gf_ctx.graph;
- call_frame_t *frame = NULL;
-
-
- frame = create_frame (this, pool);
-
- frame->root->uid = geteuid ();
- frame->root->gid = getegid ();
- frame->root->pid = ctx->pid;
- frame->root->unique = ctx->counter++;
-
- return frame;
-}
-
-void
-libgf_client_fini (xlator_t *this)
-{
- FREE (this->private);
- return;
-}
-
-
-int32_t
-libgf_client_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- libglusterfs_client_private_t *priv = this->private;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- pthread_mutex_lock (&priv->lock);
- {
- priv->complete = 1;
- pthread_cond_broadcast (&priv->init_con_established);
- }
- pthread_mutex_unlock (&priv->lock);
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
-}
-
-int32_t
-libgf_client_init (xlator_t *this)
-{
- return 0;
-}
-
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *init_ctx, uint32_t fakefsid)
-{
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_private_t *priv = NULL;
- FILE *specfp = NULL;
- xlator_t *graph = NULL, *trav = NULL;
- call_pool_t *pool = NULL;
- int32_t ret = 0;
- struct rlimit lim;
- uint32_t xl_count = 0;
- loc_t new_loc = {0, };
- struct timeval tv = {0, };
- uint32_t len = 0;
- char buf[PATH_MAX];
-
- if (!init_ctx || (!init_ctx->specfile && !init_ctx->specfp)) {
- errno = EINVAL;
- return NULL;
- }
-
- ctx = CALLOC (1, sizeof (*ctx));
- if (!ctx) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: out of memory\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
-
- errno = ENOMEM;
- return NULL;
- }
-
- ctx->lookup_timeout = init_ctx->lookup_timeout;
- ctx->stat_timeout = init_ctx->stat_timeout;
- ctx->fake_fsid = fakefsid;
- ctx->pid = getpid ();
- pthread_mutex_init (&ctx->gf_ctx.lock, NULL);
-
- pool = ctx->gf_ctx.pool = CALLOC (1, sizeof (call_pool_t));
- if (!pool) {
- errno = ENOMEM;
- FREE (ctx);
- return NULL;
- }
-
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- /* FIXME: why is count hardcoded to 16384 */
- ctx->gf_ctx.event_pool = event_pool_new (16384);
- ctx->gf_ctx.page_size = LIBGF_IOBUF_SIZE;
- ctx->gf_ctx.iobuf_pool = iobuf_pool_new (8 * 1048576,
- ctx->gf_ctx.page_size);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
- setrlimit (RLIMIT_NOFILE, &lim);
-
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
-
- if (init_ctx->logfile)
- ctx->gf_ctx.cmd_args.log_file = strdup (init_ctx->logfile);
- else
- ctx->gf_ctx.cmd_args.log_file = strdup ("/dev/stderr");
-
- if (init_ctx->loglevel) {
- if (!strncasecmp (init_ctx->loglevel, "DEBUG",
- strlen ("DEBUG"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_DEBUG;
- } else if (!strncasecmp (init_ctx->loglevel, "WARNING",
- strlen ("WARNING"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
- } else if (!strncasecmp (init_ctx->loglevel, "CRITICAL",
- strlen ("CRITICAL"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_CRITICAL;
- } else if (!strncasecmp (init_ctx->loglevel, "NONE",
- strlen ("NONE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_NONE;
- } else if (!strncasecmp (init_ctx->loglevel, "ERROR",
- strlen ("ERROR"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_ERROR;
- } else if (!strncasecmp (init_ctx->loglevel, "TRACE",
- strlen ("TRACE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_TRACE;
- } else {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: Unrecognized log-level \"%s\", possible values are \"DEBUG|WARNING|[ERROR]|CRITICAL|NONE|TRACE\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- init_ctx->loglevel);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- errno = EINVAL;
- return NULL;
- }
- }
-
- if (first_init)
- {
- memset (buf, 0, PATH_MAX);
-
- if (getcwd (buf, PATH_MAX) == NULL) {
- fprintf (stderr, "libglusterfsclient: cannot get "
- "current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- len = strlen (buf);
- if ((buf[len - 1] != '/')) {
- if ((len + 2) > PATH_MAX) {
- errno = ENAMETOOLONG;
- fprintf (stderr, "libglusterfsclient: cannot"
- "get current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- strcat (buf, "/");
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, buf);
- cwd_inited = 1;
- }
- pthread_mutex_unlock (&cwdlock);
-
- ret = gf_log_init (ctx->gf_ctx.cmd_args.log_file);
- if (ret == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: failed to open logfile \"%s\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- gf_log_set_loglevel (ctx->gf_ctx.cmd_args.log_level);
- }
-
- if (init_ctx->specfp) {
- specfp = init_ctx->specfp;
- if (fseek (specfp, 0L, SEEK_SET)) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: fseek on volume file stream failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- } else if (init_ctx->specfile) {
- specfp = fopen (init_ctx->specfile, "r");
- ctx->gf_ctx.cmd_args.volume_file = strdup (init_ctx->specfile);
- }
-
- if (!specfp) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: could not open volfile: %s\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- ctx->gf_ctx.cmd_args.volume_name = strdup (init_ctx->volume_name);
- }
-
- graph = file_to_xlator_tree (&ctx->gf_ctx, specfp);
- if (!graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create configuration graph (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- trav = graph;
- while (trav) {
- if (strcmp (trav->name, init_ctx->volume_name) == 0) {
- graph = trav;
- break;
- }
- trav = trav->next;
- }
- }
-
- ctx->gf_ctx.graph = libglusterfs_graph (graph);
- if (!ctx->gf_ctx.graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph creation failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- graph = ctx->gf_ctx.graph;
- ctx->gf_ctx.top = graph;
-
- trav = graph;
- while (trav) {
- xl_count++; /* Getting this value right is very important */
- trav = trav->next;
- }
-
- ctx->gf_ctx.xl_count = xl_count + 1;
-
- priv = CALLOC (1, sizeof (*priv));
- if (!priv) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot allocate memory (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- pthread_cond_init (&priv->init_con_established, NULL);
- pthread_mutex_init (&priv->lock, NULL);
-
- graph->private = priv;
- ctx->itable = inode_table_new (LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT,
- graph);
- if (!ctx->itable) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create inode table\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- xlator_tree_free (graph);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- set_global_ctx_ptr (&ctx->gf_ctx);
- ctx->gf_ctx.process_uuid = zr_build_process_uuid ();
-
- if (xlator_graph_init (graph) == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph initialization failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- /* Send notify to all translator saying things are ready */
- graph->notify (graph, GF_EVENT_PARENT_UP, graph);
-
- if (gf_timer_registry_init (&ctx->gf_ctx) == NULL) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: timer init failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- if ((ret = pthread_create (&ctx->reply_thread, NULL, poll_proc,
- (void *)&ctx->gf_ctx))) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: reply thread creation failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- while (!priv->complete) {
- pthread_cond_wait (&priv->init_con_established,
- &priv->lock);
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- /*
- * wait for some time to allow initialization of all children of
- * distribute before sending lookup on '/'
- */
-
- tv.tv_sec = 0;
- tv.tv_usec = (100 * 1000);
- select (0, NULL, NULL, NULL, &tv);
-
- /* workaround for xlators like dht which require lookup to be sent
- * on / */
- libgf_client_loc_fill (&new_loc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR, "lookup of /"
- " failed");
- return NULL;
- }
- libgf_client_loc_wipe (&new_loc);
-
- first_init = 0;
-
- return ctx;
-}
-
-struct vmp_entry *
-libgf_init_vmpentry (char *vmp, glusterfs_handle_t *vmphandle)
-{
- struct vmp_entry *entry = NULL;
- int vmplen = 0;
- int appendslash = 0;
- int ret = -1;
-
- entry = CALLOC (1, sizeof (struct vmp_entry));
- if (!entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- return NULL;
- }
-
- vmplen = strlen (vmp);
- if (vmp[vmplen - 1] != '/') {
- vmplen++;
- appendslash = 1;
- }
-
- entry->vmp = CALLOC (vmplen + 1, sizeof (char));
- if (!entry->vmp) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation "
- "failed");
- goto free_entry;
- }
-
- strcpy (entry->vmp, vmp);
- if (appendslash) {
- entry->vmp[vmplen-1] = '/';
- entry->vmp[vmplen] = '\0';
- }
-
- entry->vmplen = vmplen;
- entry->handle = vmphandle;
- INIT_LIST_HEAD (&entry->list);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New VMP entry: %s", vmp);
-
- ret = 0;
-
-free_entry:
- if (ret == -1) {
- if (entry->vmp)
- FREE (entry->vmp);
- if (entry)
- FREE (entry);
- entry = NULL;
- }
- return entry;
-}
-
-void
-libgf_free_vmp_entry (struct vmp_entry *entry)
-{
- FREE (entry->vmp);
- FREE (entry);
-}
-
-int
-libgf_count_path_components (char *path)
-{
- int compos = 0;
- char *pathdup = NULL;
- int len = 0;
-
- if (!path)
- return -1;
-
- pathdup = strdup (path);
- if (!pathdup)
- return -1;
-
- len = strlen (pathdup);
- if (pathdup[len - 1] == '/')
- pathdup[len - 1] = '\0';
-
- path = pathdup;
- while ((path = strchr (path, '/'))) {
- compos++;
- ++path;
- }
-
- free (pathdup);
- return compos;
-}
-
-/* Returns the number of components that match between
- * the VMP and the path. Assumes string1 is vmp entry.
- * Assumes both are absolute paths.
- */
-int
-libgf_strmatchcount (char *string1, char *string2)
-{
- int matchcount = 0;
- char *s1dup = NULL, *s2dup = NULL;
- char *tok1 = NULL, *saveptr1 = NULL;
- char *tok2 = NULL, *saveptr2 = NULL;
-
- if ((!string1) || (!string2))
- return 0;
-
- s1dup = strdup (string1);
- if (!s1dup)
- return 0;
-
- s2dup = strdup (string2);
- if (!s2dup)
- goto free_s1;
-
- string1 = s1dup;
- string2 = s2dup;
-
- tok1 = strtok_r(string1, "/", &saveptr1);
- tok2 = strtok_r (string2, "/", &saveptr2);
- while (tok1) {
- if (!tok2)
- break;
-
- if (strcmp (tok1, tok2) != 0)
- break;
-
- matchcount++;
- tok1 = strtok_r(NULL, "/", &saveptr1);
- tok2 = strtok_r (NULL, "/", &saveptr2);
- }
-
- free (s2dup);
-free_s1:
- free (s1dup);
- return matchcount;
-}
-
-int
-libgf_vmp_entry_match (struct vmp_entry *entry, char *path)
-{
- return libgf_strmatchcount (entry->vmp, path);
-}
-
-#define LIBGF_VMP_EXACT 1
-#define LIBGF_VMP_LONGESTPREFIX 0
-
-
-/* copies vmp from the vmp-entry having glusterfs handle @handle, into @vmp */
-char *
-libgf_vmp_search_vmp (glusterfs_handle_t handle, char *vmp, size_t vmp_size)
-{
- char *res = NULL;
- struct vmp_entry *entry = NULL;
-
- if (handle == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto unlock;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- if (entry->handle == handle) {
- if ((vmp_size) < (strlen (entry->vmp) + 1)) {
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (vmp, entry->vmp);
- res = vmp;
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&vmplock);
-
-out:
- return res;
-}
-
-
-struct vmp_entry *
-_libgf_vmp_search_entry (char *path, int searchtype)
-{
- struct vmp_entry *entry = NULL;
- int matchcount = 0;
- struct vmp_entry *maxentry = NULL;
- int maxcount = 0;
- int vmpcompcount = 0;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Search: path %s, type: %s",
- path, (searchtype == LIBGF_VMP_EXACT)?"Exact":"LongestPrefix");
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto out;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- vmpcompcount = libgf_count_path_components (entry->vmp);
- matchcount = libgf_vmp_entry_match (entry, path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Candidate VMP: %s,"
- " Matchcount: %d", entry->vmp, matchcount);
- if ((matchcount > maxcount) && (matchcount == vmpcompcount)) {
- maxcount = matchcount;
- maxentry = entry;
- }
- }
-
- /* To ensure that the longest prefix matched entry is also an exact
- * match, this is used to check whether duplicate entries are present
- * in the vmplist.
- */
- vmpcompcount = 0;
- if ((searchtype == LIBGF_VMP_EXACT) && (maxentry)) {
- vmpcompcount = libgf_count_path_components (maxentry->vmp);
- matchcount = libgf_count_path_components (path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Exact Check: VMP: %s,"
- " CompCount: %d, Path: %s, CompCount: %d",
- maxentry->vmp, vmpcompcount, path, matchcount);
- if (vmpcompcount != matchcount) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No Match");
- maxentry = NULL;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Matches!");
- }
-
-out:
- return maxentry;
-}
-
-/* Used to search for a exactly matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_exact_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_EXACT);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-
-/* Used to search for a longest prefix matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_LONGESTPREFIX);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-int
-libgf_vmp_map_ghandle (char *vmp, glusterfs_handle_t *vmphandle)
-{
- int ret = -1;
- struct vmp_entry *vmpentry = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New Entry: %s", vmp);
- vmpentry = libgf_init_vmpentry (vmp, vmphandle);
- if (!vmpentry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to create VMP"
- " entry");
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Empty list");
- INIT_LIST_HEAD (&vmplist.list);
- }
-
- list_add_tail (&vmpentry->list, &vmplist.list);
- ++vmplist.entries;
- }
- pthread_mutex_unlock (&vmplock);
- ret = 0;
-
-out:
- return ret;
-}
-
-/* Path must be validated already. */
-glusterfs_handle_t
-libgf_vmp_get_ghandle (char * path)
-{
- struct vmp_entry *entry = NULL;
-
- entry = libgf_vmp_search_entry (path);
-
- if (entry == NULL)
- return NULL;
-
- return entry->handle;
-}
-
-
-/* Returns the handle for the path given in @path,
- * @path can be a relative path. The point is, here we
- * perform any path resolution that is needed and then
- * search for the corresponding vmp handle.
- * @vpath is a result-value argument in that the virtual
- * path inside the handle is copied into it.
- */
-glusterfs_handle_t
-libgf_resolved_path_handle (const char *path, char *vpath)
-{
- char *respath = NULL;
- struct vmp_entry *entry = NULL;
- glusterfs_handle_t handle = NULL;
- char *tmp = NULL;
-
- if ((!path) || (!vpath))
- return NULL;
-
- /* We only want compaction before VMP entry search because the
- * VMP cannot be search unless we have an absolute path.
- * For absolute paths, we search for VMP first, then perform the
- * path compaction on the given virtual path.
- */
- if (!libgf_path_absolute (path)) {
- respath = libgf_resolve_path_light ((char *)path);
- if (respath == NULL)
- return NULL;
- }
-
- /* This condition is needed because in case of absolute paths, the path
- * would already include the VMP and we want to ensure that any path
- * compaction that happens does not exclude the VMP. In the absence of
- * this condition an absolute path might get compacted to "/", i.e.
- * exclude the VMP, and the search will fail.
- *
- * For relative paths, respath will aleady include a potential VMP
- * as a consequence of us prepending the CWD in resolve_light above.
- */
- if (libgf_path_absolute (path)) {
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, path, vpath);
- if (!tmp)
- goto free_respath;
-
- respath = libgf_resolve_path_light (vpath);
- strcpy (vpath, respath);
- } else {
- entry = libgf_vmp_search_entry (respath);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, respath, vpath);
- if (!tmp)
- goto free_respath;
- }
-
- handle = entry->handle;
-free_respath:
- if (respath)
- free (respath); /* Alloced in libgf_resolve_path_light */
-
- return handle;
-}
-
-
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars)
-{
- glusterfs_handle_t vmphandle = NULL;
- int ret = -1;
- char *vmp_resolved = NULL;
- struct vmp_entry *vmp_entry = NULL;
- uint32_t vmphash = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ipars, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- vmphash = (dev_t)ReallySimpleHash (vmp, strlen (vmp));
- pthread_mutex_lock (&mountlock);
- {
- vmp_entry = libgf_vmp_search_exact_entry (vmp);
- if (vmp_entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entry exists");
- ret = 0;
- goto unlock;
- }
-
- vmphandle = glusterfs_init (ipars, vmphash);
- if (!vmphandle) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "GlusterFS context"
- " init failed");
- goto unlock;
- }
-
- ret = libgf_vmp_map_ghandle (vmp_resolved, vmphandle);
- if (ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to map new"
- " handle: %s", vmp);
- glusterfs_fini (vmphandle);
- }
- }
-unlock:
- pthread_mutex_unlock (&mountlock);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-inline int
-_libgf_umount (char *vmp)
-{
- struct vmp_entry *entry= NULL;
- int ret = -1;
-
- entry = _libgf_vmp_search_entry (vmp, LIBGF_VMP_EXACT);
- if (entry == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) not mounted", vmp);
- goto out;
- }
-
- if (entry->handle == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) has no corresponding glusterfs handle",
- vmp);
- goto out;
- }
-
-/* ret = glusterfs_fini (entry->handle); */
- list_del_init (&entry->list);
- libgf_free_vmp_entry (entry);
-
- vmplist.entries--;
-
-out:
- return ret;
-}
-
-inline int
-libgf_umount (char *vmp)
-{
- int ret = -1;
-
- pthread_mutex_lock (&vmplock);
- {
- ret = _libgf_umount (vmp);
- }
- pthread_mutex_unlock (&vmplock);
-
- return ret;
-}
-
-int
-glusterfs_umount (char *vmp)
-{
- int ret = -1;
- char *vmp_resolved = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- ret = libgf_umount (vmp_resolved);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-int
-glusterfs_umount_all (void)
-{
- struct vmp_entry *entry = NULL, *tmp = NULL;
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries > 0) {
- list_for_each_entry_safe (entry, tmp, &vmplist.list,
- list) {
- /* even if there are errors, continue with other
- mounts
- */
- _libgf_umount (entry->vmp);
- }
- }
- }
- pthread_mutex_unlock (&vmplock);
-
- return 0;
-}
-
-void
-glusterfs_reset (void)
-{
- INIT_LIST_HEAD (&vmplist.list);
- vmplist.entries = 0;
-
- memset (&vmplock, 0, sizeof (vmplock));
- pthread_mutex_init (&vmplock, NULL);
-
- first_init = 1;
-}
-
-void
-glusterfs_log_lock (void)
-{
- gf_log_lock ();
-}
-
-
-void glusterfs_log_unlock (void)
-{
- gf_log_unlock ();
-}
-
-
-void
-libgf_wait_for_frames_unwind (libglusterfs_client_ctx_t *ctx)
-{
- call_pool_t *pool = NULL;
- int canreturn = 0;
-
- if (!ctx)
- return;
-
- pool = (call_pool_t *)ctx->gf_ctx.pool;
- while (1) {
- LOCK (&pool->lock);
- {
- if (pool->cnt == 0) {
- canreturn = 1;
- goto unlock_out;
- }
- }
-unlock_out:
- UNLOCK (&pool->lock);
-
- if (canreturn)
- break;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Waiting for call frames");
- sleep (1);
- }
-
- return;
-}
-
-
-int
-glusterfs_fini (glusterfs_handle_t handle)
-{
- libglusterfs_client_ctx_t *ctx = handle;
-
- libgf_wait_for_frames_unwind (ctx);
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- mem_pool_destroy (ctx->itable->inode_pool);
- mem_pool_destroy (ctx->itable->dentry_pool);
- mem_pool_destroy (ctx->itable->fd_mem_pool);
- /* iobuf_pool_destroy (ctx->gf_ctx.iobuf_pool); */
- ((gf_timer_registry_t *)ctx->gf_ctx.timer)->fin = 1;
-
- xlator_graph_fini (ctx->gf_ctx.graph);
- xlator_tree_free (ctx->gf_ctx.graph);
- ctx->gf_ctx.graph = NULL;
- pthread_cancel (ctx->reply_thread);
-
- FREE (ctx);
-
- return 0;
-}
-
-
-int32_t
-libgf_client_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- dict_t *xattr_req = NULL;
-
- if (op_ret == 0) {
- inode_t *parent = NULL;
-
- if (local->fop.lookup.loc->ino == 1) {
- buf->ia_ino = 1;
- }
-
- parent = local->fop.lookup.loc->parent;
- if (inode->ino != 1) {
- inode = inode_link (inode, parent,
- local->fop.lookup.loc->name, buf);
- }
-
- libgf_transform_iattr (ctx, inode, buf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup.loc->inode);
- local->fop.lookup.loc->inode = inode_new (ctx->itable);
- local->fop.lookup.is_revalidate = 2;
-
- if (local->fop.lookup.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- buf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
- STACK_WIND (frame, libgf_client_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- local->reply_stub = fop_lookup_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, dict, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation"
- " failed");
- errno = ENOMEM;
- return -1;
- }
-
- if (loc->inode) {
- local->fop.lookup.is_revalidate = 1;
- loc->ino = loc->inode->ino;
- }
- else
- loc->inode = inode_new (ctx->itable);
-
- local->fop.lookup.loc = loc;
-
- LIBGF_CLIENT_FOP(ctx, stub, lookup, local, loc, xattr_req);
-
- op_ret = stub->args.lookup_cbk.op_ret;
- errno = stub->args.lookup_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.lookup_cbk.inode;
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_transform_iattr (ctx, inode, &stub->args.lookup_cbk.buf);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL,
- &stub->args.lookup_cbk.buf);
- if (stbuf)
- *stbuf = stub->args.lookup_cbk.buf;
-
- if (dict)
- *dict = dict_ref (stub->args.lookup_cbk.dict);
-
- if (inode != loc->inode) {
- inode_unref (loc->inode);
- loc->inode = inode_ref (inode);
- }
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- dict_t *dict = NULL;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
- struct iatt iatt = {0,};
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
- if (size < 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid size");
- errno = EINVAL;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (size) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "setting requested content size dictionary failed");
- goto out;
- }
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &iatt, &dict, xattr_req);
- iatt_to_stat (&iatt, stbuf);
- if (!op_ret && stbuf && (iatt.ia_size <= size) && dict && buf) {
- data_t *mem_data = NULL;
- void *mem = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem != NULL) {
- memcpy (buf, mem, iatt.ia_size);
- }
- }
-
-out:
- if (xattr_req) {
- dict_unref (xattr_req);
- }
-
- if (dict) {
- dict_unref (dict);
- }
-
- if (pathname) {
- FREE (pathname);
- }
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf)
-{
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, stbuf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_get (h, vpath, buf, size, stbuf);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_lookup_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *stbuf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- glusterfs_get_cbk_t lookup_cbk = local->fop.lookup_cbk.cbk;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- glusterfs_iobuf_t *iobuf = NULL;
- dict_t *xattr_req = NULL;
- inode_t *parent = NULL;
- struct stat stat = {0,};
-
- if (op_ret == 0) {
- parent = local->fop.lookup_cbk.loc->parent;
- inode_link (inode, parent, local->fop.lookup_cbk.loc->name,
- stbuf);
- libgf_transform_iattr (ctx, inode, stbuf);
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL, stbuf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup_cbk.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup_cbk.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup_cbk.loc->inode);
- local->fop.lookup_cbk.loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 2;
-
- if (local->fop.lookup_cbk.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup_cbk.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- stbuf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
-
- STACK_WIND (frame, libgf_client_lookup_async_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup_cbk.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- if (!op_ret && local->fop.lookup_cbk.size && dict) {
- data_t *mem_data = NULL;
- void *mem = NULL;
- struct iovec *vector = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem && stbuf->ia_size <= local->fop.lookup_cbk.size) {
- iobuf = CALLOC (1, sizeof (*iobuf));
- ERR_ABORT (iobuf);
-
- vector = CALLOC (1, sizeof (*vector));
- ERR_ABORT (vector);
- vector->iov_base = mem;
- vector->iov_len = stbuf->ia_size;
-
- iobuf->vector = vector;
- iobuf->count = 1;
- iobuf->dictref = dict_ref (dict);
- }
- }
-
- iatt_to_stat (stbuf, &stat);
- lookup_cbk (op_ret, op_errno, iobuf, &stat, local->cbk_data);
-
- libgf_client_loc_wipe (local->fop.lookup_cbk.loc);
- free (local->fop.lookup_cbk.loc);
-
- free (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-/* TODO: implement async dentry lookup */
-
-int
-glusterfs_get_async (glusterfs_handle_t handle,
- const char *path,
- size_t size,
- glusterfs_get_cbk_t cbk,
- void *cbk_data)
-{
- loc_t *loc = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- libglusterfs_client_async_local_t *local = NULL;
- int32_t op_ret = 0;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
-
- if (!ctx || !path || path[0] != '/') {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- local->fop.lookup_cbk.is_revalidate = 1;
-
- loc = CALLOC (1, sizeof (*loc));
- loc->path = strdup (path);
- op_ret = libgf_client_path_lookup (loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "path lookup failed for (%s)", path);
- goto out;
- }
-
- pathname = strdup (path);
- name = basename (pathname);
- op_ret = libgf_client_loc_fill (loc, ctx, 0, loc->parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (!loc->inode) {
- loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 0;
- }
-
- local->fop.lookup_cbk.cbk = cbk;
- local->fop.lookup_cbk.size = size;
- local->fop.lookup_cbk.loc = loc;
- local->cbk_data = cbk_data;
-
- if (size > 0) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- goto out;
- }
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libgf_client_lookup_async_cbk,
- lookup,
- loc,
- xattr_req);
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- return op_ret;
-}
-
-int32_t
-libgf_client_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_getxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_getxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, getxattr, local, loc, name);
-
- op_ret = stub->args.getxattr_cbk.op_ret;
- errno = stub->args.getxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.getxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- if ((size > 0) && (value != NULL)) {
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- op_ret = copy_len;
- }
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-#define LIBGF_DO_GETXATTR 1
-#define LIBGF_DO_LGETXATTR 2
-
-ssize_t
-__glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size,
- int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *file = NULL;
- char *pathres = NULL, *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu,"
- " op %d", path, name, (long unsigned)size, whichop);
- if (name[0] == '\0') {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: Name"
- " not NULL terminated");
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than zero");
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- tmp = strdup (pathres);
- file = basename (tmp);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, file);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (whichop == LIBGF_DO_LGETXATTR)
- goto do_getx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_getx;
-
- libgf_client_loc_wipe (&loc);
- op_ret = libgf_realpath_loc_fill (ctx, (char *)pathres, &loc);
- if (op_ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "realpath failed");
- goto out;
- }
-
-do_getx:
- op_ret = libgf_client_getxattr (ctx, &loc, name, value, size);
-
-out:
- if (tmp) {
- FREE (tmp);
- }
-
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_GETXATTR);
-}
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_LGETXATTR);
-}
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_GETXATTR);
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_LGETXATTR);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_open (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, open, local, loc, flags, fd, 0);
-
- op_ret = stub->args.open_cbk.op_ret;
- errno = stub->args.open_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "open: path %s, status: %d, errno"
- " %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-static int32_t
-libgf_client_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_create_cbk_stub (frame, NULL, op_ret, op_errno,
- fd, inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_creat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags,
- mode_t mode)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, create, local, loc, flags, mode, fd);
-
- op_ret = stub->args.create_cbk.op_ret;
- errno = stub->args.create_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Create: path %s, status: %d,"
- " errno: %d", loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.create_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.create_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.create_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.create_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int32_t
-libgf_client_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_opendir_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_opendir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (((fd->flags & O_ACCMODE) == O_WRONLY)
- || ((fd->flags & O_ACCMODE) == O_RDWR)) {
- errno = EISDIR;
- goto out;
- }
- LIBGF_CLIENT_FOP (ctx, stub, opendir, local, loc, fd);
-
- op_ret = stub->args.opendir_cbk.op_ret;
- errno = stub->args.opendir_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "opendir: path %s, status %d,"
- " errno %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
-
- call_stub_destroy (stub);
-out:
- return op_ret;
-}
-
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,...)
-{
- loc_t loc = {0, };
- long op_ret = -1;
- fd_t *fd = NULL;
- int32_t ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- mode_t mode = 0;
- va_list ap;
- char *pathres = NULL;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if ((op_ret == -1) && ((flags & O_CREAT) != O_CREAT)) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- if (!op_ret && ((flags & O_CREAT) == O_CREAT)
- && ((flags & O_EXCL) == O_EXCL)) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- if (op_ret == 0) {
- flags &= ~O_CREAT;
- }
-
- if ((op_ret == -1) && ((flags & O_CREAT) == O_CREAT)) {
- libgf_client_loc_wipe (&loc);
- loc.path = strdup (pathres);
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for parent while trying to"
- " create (%s)", pathres);
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- }
-
- pathname = strdup (pathres);
- name = basename (pathname);
-
- ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- fd = fd_create (loc.inode, ctx->pid);
- fd->flags = flags;
-
- if (((flags & O_CREAT) == O_CREAT)) {
- /* If we have the st_mode for the basename, check if
- * it is a directory here itself, rather than sending
- * a network message through libgf_client_creat, and
- * then receiving a EISDIR.
- */
- if (IA_ISDIR (loc.inode->ia_type)) {
- errno = EISDIR;
- op_ret = -1;
- goto op_over;
- }
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- op_ret = libgf_client_creat (ctx, &loc, fd, flags, mode);
- } else {
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_opendir (ctx, &loc, fd);
- else
- op_ret = libgf_client_open (ctx, &loc, fd, flags);
- }
-
-op_over:
- if (op_ret == -1) {
- fd_unref (fd);
- fd = NULL;
- goto out;
- }
-
- vpath = NULL;
- if (IA_ISDIR (loc.inode->ia_type)) {
- vpath = (char *)path;
- }
-
- if (!libgf_get_fd_ctx (fd)) {
- if (!libgf_alloc_fd_ctx (ctx, fd, vpath)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to"
- " allocate fd context");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
- }
-
- if ((flags & O_TRUNC) && (((flags & O_ACCMODE) == O_RDWR)
- || ((flags & O_ACCMODE) == O_WRONLY))) {
- inode_ctx = libgf_get_inode_ctx (fd->inode);
- if (IA_ISREG (inode_ctx->stbuf.ia_type)) {
- inode_ctx->stbuf.ia_size = 0;
- inode_ctx->stbuf.ia_blocks = 0;
- }
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- FREE (pathname);
- }
-
- if (pathres)
- FREE (pathres);
-
- return fd;
-}
-
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...)
-{
- va_list ap;
- glusterfs_file_t fh = NULL;
- glusterfs_handle_t h = NULL;
- mode_t mode = 0;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- if (flags & O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- fh = glusterfs_glh_open (h, vpath, flags, mode);
- } else
- fh = glusterfs_glh_open (h, vpath, flags);
-out:
- return fh;
-}
-
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- return glusterfs_glh_open (handle, path,
- (O_CREAT | O_WRONLY | O_TRUNC), mode);
-}
-
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode)
-{
- glusterfs_file_t fh = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- fh = glusterfs_glh_creat (h, vpath, mode);
-
-out:
- return fh;
-}
-
-int32_t
-libgf_client_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_flush_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_flush (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- call_stub_t *stub;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, flush, local, fd);
-
- op_ret = stub->args.flush_cbk.op_ret;
- errno = stub->args.flush_cbk.op_errno;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_close (glusterfs_file_t fd)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_flush (ctx, (fd_t *)fd);
-
- fd_unref ((fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
-
- LIBGF_CLIENT_FOP (ctx, stub, setxattr, local, loc, dict, flags);
-
- op_ret = stub->args.setxattr_cbk.op_ret;
- errno = stub->args.setxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, status %d,"
- "errno %d", loc->path, name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-#define LIBGF_DO_SETXATTR 1
-#define LIBGF_DO_LSETXATTR 2
-
-int
-__glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags, int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *tmppath = NULL;
- loc_t *realloc = NULL;
- char *pathres = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, op %d", path
- ,name, whichop);
- if (size <= 0) {
- errno = EINVAL;
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", pathres);
- goto out;
- }
-
- tmppath = strdup (pathres);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (tmppath));
- FREE (tmppath);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- realloc = &loc;
- if (whichop == LIBGF_DO_LSETXATTR)
- goto do_setx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_setx;
-
- libgf_client_loc_wipe (&loc);
- realloc = &loc;
- libgf_realpath_loc_fill (ctx, (char *)pathres, realloc);
-
-do_setx:
- if (!op_ret)
- op_ret = libgf_client_setxattr (ctx, realloc, name, value,
- size, flags);
-
-out:
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (realloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_SETXATTR);
-}
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_LSETXATTR);
-}
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_SETXATTR);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_LSETXATTR);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_fsetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetxattr, local, fd, dict, flags);
-
- op_ret = stub->args.fsetxattr_cbk.op_ret;
- errno = stub->args.fsetxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "name %s, status %d, errno %d",
- name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- int32_t op_ret = 0;
- fd_t *__fd = fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- gf_log("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid fd");
- goto out;
- }
-
- if (size <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than or equal to zero");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fsetxattr (ctx, __fd, name, value, size,
- flags);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fgetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_fgetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fgetxattr, local, fd, name);
-
- op_ret = stub->args.fgetxattr_cbk.op_ret;
- errno = stub->args.fgetxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.fgetxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s, status %d, errno %d",
- name, op_ret, errno);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd,
- const char *name,
- void *value,
- size_t size)
-{
- int32_t op_ret = 0;
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s", name);
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0)
- goto out;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fgetxattr (ctx, __fd, name, value, size);
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd,
- const char *name)
-{
- return ENOSYS;
-}
-
-int32_t
-libgf_client_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readv_cbk_stub (frame, NULL, op_ret, op_errno,
- vector, count, stbuf, iobref);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_iobuf_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *vector = NULL;
- int32_t op_ret = -1;
- int count = 0;
- libgf_client_local_t *local = NULL;
- struct iatt *stbuf = NULL;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- count = stub->args.readv_cbk.count;
- vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- int i = 0;
- op_ret = 0;
- while (size && (i < count)) {
- int len = (size < vector[i].iov_len) ?
- size : vector[i].iov_len;
- memcpy (buf, vector[i++].iov_base, len);
- buf += len;
- size -= len;
- op_ret += len;
- }
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "size %lu, offset %"PRIu64,
- (long unsigned)size, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- int32_t op_ret = -1;
- int32_t ret = 0;
- size_t tmp = 0;
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE :
- size);
- op_ret = libgf_client_iobuf_read (ctx, fd, buf, tmp, offset);
- if (op_ret < 0) {
- ret = op_ret;
- break;
- }
-
- ret += op_ret;
-
- if (op_ret < tmp)
- break;
-
- size -= op_ret;
- offset += op_ret;
- buf = (char *)buf + op_ret;
- }
-
- return ret;
-}
-
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (nbytes < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (fd == 0) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, nbytes, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_iobuf_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int count,
- size_t size, off_t offset, int *idx,
- off_t *vec_offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *src_vector = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
- int src = 0, dst = 0;
- int src_count = 0, dst_count = 0;
- int len = 0, src_len = 0, dst_len = 0;
- off_t src_offset = 0, dst_offset = 0;
- struct iatt *stbuf = NULL;
-
- dst = *idx;
- dst_offset = *vec_offset;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- src_count = stub->args.readv_cbk.count;
- src_vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- while ((size != 0) && (dst < dst_count) && (src < src_count)) {
- src_len = src_vector[src].iov_len - src_offset;
- dst_len = dst_vector[dst].iov_len - dst_offset;
-
- len = (src_len < dst_len) ? src_len : dst_len;
- if (len > size) {
- len = size;
- }
-
- memcpy (dst_vector[dst].iov_base + dst_offset,
- src_vector[src].iov_base + src_offset, len);
-
- size -= len;
- src_offset += len;
- dst_offset += len;
-
- if (src_offset == src_vector[src].iov_len) {
- src_offset = 0;
- src++;
- }
-
- if (dst_offset == dst_vector[dst].iov_len) {
- dst_offset = 0;
- dst++;
- }
- }
-
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_UPDATE_STAT);
- }
-
- *idx = dst;
- *vec_offset = dst_offset;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int dst_count, off_t offset)
-{
- int32_t op_ret = -1;
- size_t size = 0, tmp = 0, ret = 0;
- int i = 0;
- int dst_idx = 0;
- off_t dst_offset = 0;
-
- for (i = 0; i < dst_count; i++)
- {
- size += dst_vector[i].iov_len;
- }
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE : size);
- op_ret = libgf_client_iobuf_readv (ctx, fd, dst_vector,
- dst_count, tmp, offset,
- &dst_idx, &dst_offset);
- if (op_ret <= 0) {
- break;
- }
-
- offset += op_ret;
- size -= op_ret;
- ret += op_ret;
- }
-
- return ret;
-}
-
-
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readv (ctx, (fd_t *)fd, vec, count, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pread (glusterfs_file_t fd,
- void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, count, offset);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_writev_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_iobuf_write (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *addr,
- size_t size, off_t offset)
-{
- struct iobref *ioref = NULL;
- struct iobuf *iob = NULL;
- int op_ret = -1;
- struct iovec iov = {0, };
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, addr, out);
-
- ioref = iobref_new ();
- if (!ioref) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- iob = iobuf_get (ctx->gf_ctx.iobuf_pool);
- if (!iob) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- memcpy (iob->ptr, addr, size);
- iobref_add (ioref, iob);
-
- iov.iov_base = iob->ptr;
- iov.iov_len = size;
-
- LIBGF_CLIENT_FOP (ctx, stub, writev, local, fd, &iov,
- 1, offset, ioref);
-
- op_ret = stub->args.writev_cbk.op_ret;
- errno = stub->args.writev_cbk.op_errno;
-
- /* We need to invalidate because it is possible that write-behind
- * is a translator below us and returns a stat filled with zeroes.
- */
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
-
-out:
- if (iob) {
- iobuf_unref (iob);
- }
-
- if (ioref) {
- iobref_unref (ioref);
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_writev (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct iovec *vector,
- int count,
- off_t offset)
-{
- int op_ret = 0;
- int written = 0;
- int writesize = 0;
- int size = 0;
- char *base = NULL;
- int i = 0;
-
- for (i = 0; i < count; i++) {
- size = vector[i].iov_len;
- base = vector[i].iov_base;
-
- while (size > 0) {
- writesize = (size > LIBGF_IOBUF_SIZE) ?
- LIBGF_IOBUF_SIZE : size;
-
- written = libgf_client_iobuf_write (ctx, fd, base,
- writesize, offset);
-
- if (written == -1)
- goto out;
-
- op_ret += written;
- base += written;
- size -= written;
- offset += written;
- }
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_write (glusterfs_file_t fd,
- const void *buf,
- size_t n)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (n < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (n == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- vector.iov_base = (void *)buf;
- vector.iov_len = n;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_writev (glusterfs_file_t fd,
- const struct iovec *vector,
- int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- (struct iovec *)vector,
- count,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd,
- const void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = count;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- libgf_client_local_t *local = frame->local;
-
- /* Note, we dont let entries reach the stub because there it gets copied
- * while we can simply delink the entries here and link them into our
- * dcache, thereby avoiding the need to perform more allocations and
- * copies.
- */
- local->reply_stub = fop_readdirp_cbk_stub (frame, NULL, op_ret,
- op_errno, NULL);
- if (op_ret > 0)
- libgf_dcache_update (frame->root->state, local->fd, entries);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (libgf_dcache_readdir (ctx, fd, dirp, offset))
- return 1;
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readdirp, local, fd,
- LIBGF_READDIR_BLOCK, *offset);
-
- errno = stub->args.readdir_cbk.op_errno;
-
- op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, entry, out);
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp,
- &offset);
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed:"
- " %s", strerror (errno));
- if (result && (op_ret == 0)) {
- *result = NULL;
- } else if (op_ret < 0){
- op_ret = errno;
- }
- goto unlock;
- }
-
- fd_ctx->offset = offset;
-
- if (result) {
- *result = memcpy (entry, dirp, sizeof (*entry));
- } else {
- memcpy (entry, dirp, sizeof (*entry));
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", "
- " entry %s", offset, entry->d_name);
- op_ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return op_ret;
-}
-
-
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset);
-
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed: %s",
- strerror (errno));
- dirp = NULL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", entry %s",
- offset, dirp->d_name);
-out:
- return dirp;
-}
-
-
-int
-glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp,
- unsigned int count)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset);
-
- if (op_ret > 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libglusterfs_readv_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- glusterfs_iobuf_t *buf;
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *__fd = local->fop.readv_cbk.fd;
- glusterfs_readv_cbk_t readv_cbk = local->fop.readv_cbk.cbk;
-
- buf = CALLOC (1, sizeof (*buf));
- ERR_ABORT (buf);
-
- if (vector) {
- buf->vector = iov_dup (vector, count);
- }
-
- buf->count = count;
-
- if (iobref) {
- buf->iobref = iobref_ref (iobref);
- }
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (__fd);
-
- /* update offset only if we have used offset stored in fd_ctx */
- if (local->fop.readv_cbk.update_offset) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
- }
-
- readv_cbk (op_ret, op_errno, buf, local->cbk_data);
-
- FREE (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-void
-glusterfs_free (glusterfs_iobuf_t *buf)
-{
- //iov_free (buf->vector, buf->count);
- FREE (buf->vector);
- if (buf->iobref)
- iobref_unref ((struct iobref *) buf->iobref);
- if (buf->dictref)
- dict_unref ((dict_t *) buf->dictref);
- FREE (buf);
-}
-
-int
-glusterfs_read_async (glusterfs_file_t fd,
- size_t nbytes,
- off_t offset,
- glusterfs_readv_cbk_t readv_cbk,
- void *cbk_data)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
-
- if (nbytes < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.readv_cbk.fd = __fd;
- local->fop.readv_cbk.cbk = readv_cbk;
- local->cbk_data = cbk_data;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- local->fop.readv_cbk.update_offset = 1;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_readv_async_cbk,
- readv,
- __fd,
- nbytes,
- offset);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libglusterfs_writev_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *fd = NULL;
- glusterfs_write_cbk_t write_cbk;
-
- write_cbk = local->fop.write_cbk.cbk;
- fd = local->fop.write_cbk.fd;
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- write_cbk (op_ret, op_errno, local->cbk_data);
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int32_t
-glusterfs_write_async (glusterfs_file_t fd,
- const void *buf,
- size_t nbytes,
- off_t offset,
- glusterfs_write_cbk_t write_cbk,
- void *cbk_data)
-{
- fd_t *__fd = (fd_t *)fd;
- struct iovec vector;
- off_t __offset = offset;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
- struct iobref *iobref = NULL;
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (nbytes < 0) {
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.write_cbk.fd = __fd;
- local->fop.write_cbk.cbk = write_cbk;
- local->cbk_data = cbk_data;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = nbytes;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- iobref = iobref_new ();
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_writev_async_cbk,
- writev,
- __fd,
- &vector,
- 1,
- __offset,
- iobref);
- iobref_unref (iobref);
-
-out:
- return op_ret;
-}
-
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence)
-{
- off_t __offset = 0;
- int32_t op_ret = -1;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- __offset = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (whence)
- {
- case SEEK_SET:
- __offset = offset;
- break;
-
- case SEEK_CUR:
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- __offset += offset;
- break;
-
- case SEEK_END:
- {
- char cache_valid = 0;
- off_t end = 0;
- loc_t loc = {0, };
- struct iatt stbuf = {0, };
-
- cache_valid = libgf_is_iattr_cache_valid (ctx, __fd->inode,
- &stbuf,
- LIBGF_VALIDATE_STAT);
- if (cache_valid) {
- end = stbuf.ia_size;
- } else {
- op_ret = libgf_client_loc_fill (&loc, ctx,
- __fd->inode->ino, 0,
- NULL);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- libgf_client_loc_wipe (&loc);
- __offset = -1;
- goto out;
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &stbuf, NULL,
- NULL);
- if (op_ret < 0) {
- __offset = -1;
- libgf_client_loc_wipe (&loc);
- goto out;
- }
-
- end = stbuf.ia_size;
- }
-
- __offset = end + offset;
- libgf_client_loc_wipe (&loc);
- }
- break;
-
- default:
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid value for whence");
- __offset = -1;
- errno = EINVAL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = __offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return __offset;
-}
-
-
-int32_t
-libgf_client_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_stat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_stat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, loc->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- if (stbuf)
- memcpy (stbuf, &cachedbuf, sizeof (struct stat));
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, stat, local, loc);
-
- op_ret = stub->args.stat_cbk.op_ret;
- errno = stub->args.stat_cbk.op_errno;
- libgf_transform_iattr (ctx, loc->inode, &stub->args.stat_cbk.buf);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == 0) {
- if (stbuf)
- *stbuf = stub->args.stat_cbk.buf;
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.stat_cbk.buf);
- }
-
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc)
-{
- int op_ret = -1;
- char *target = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, link, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, targetloc, out);
-
- targetloc->path = glusterfs_glh_realpath (ctx, link, NULL);
-
- if (targetloc->path == NULL)
- goto out;
-
- op_ret = libgf_client_path_lookup (targetloc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- target = strdup (targetloc->path);
- op_ret = libgf_client_loc_fill (targetloc, ctx, 0,
- targetloc->parent->ino,
- basename (target));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
-out:
- if (target)
- FREE (target);
-
- return op_ret;
-}
-
-#define LIBGF_DO_LSTAT 0x01
-#define LIBGF_DO_STAT 0x02
-
-int
-__glusterfs_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf, int whichstat)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- loc_t targetloc = {0, };
- loc_t *real_loc = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op: %d", path,
- whichstat);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- real_loc = &loc;
- /* The stat fop in glusterfs calls lstat. So we have to
- * provide the POSIX compatible stat fop. To do so, we need to ensure
- * that if the @path is a symlink, we must perform a stat on the
- * target of that symlink than the symlink itself(..because if
- * do a stat on the symlink, we're actually doing what lstat
- * should do. See posix_stat
- */
- if (whichstat & LIBGF_DO_LSTAT)
- goto lstat_fop;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto lstat_fop;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
- real_loc = &targetloc;
-
-lstat_fop:
-
- if (!op_ret) {
- struct iatt iatt;
- op_ret = libgf_client_stat (ctx, real_loc, &iatt);
- iatt_to_stat (&iatt, buf);
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
-
- return op_ret;
-}
-
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_STAT);
-}
-
-int
-glusterfs_stat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_stat (h, vpath, buf);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path, struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_LSTAT);
-}
-
-int
-glusterfs_lstat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lstat (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fstat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-
-}
-
-int32_t
-libgf_client_fstat (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct stat *buf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, fd->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- if (buf)
- memcpy (buf, &cachedbuf, sizeof (struct stat));
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, fstat, local, fd);
-
- op_ret = stub->args.fstat_cbk.op_ret;
- errno = stub->args.fstat_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
-
- if (op_ret == 0) {
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fstat_cbk.buf);
- if (buf)
- iatt_to_stat (&stub->args.fstat_cbk.buf, buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fstat_cbk.buf);
- }
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int32_t
-glusterfs_fstat (glusterfs_file_t fd, struct stat *buf)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = -1;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_fstat (ctx, __fd, buf);
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libgf_client_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mkdir_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-static int32_t
-libgf_client_mkdir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mkdir, local, loc, mode);
- op_ret = stub->args.mkdir_cbk.op_ret;
- errno = stub->args.mkdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.mkdir_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.mkdir_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.mkdir_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.mkdir_cbk.buf);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-
-int32_t
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mkdir (ctx, &loc, mode);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_mkdir (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkdir (h, vpath, mode);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rmdir_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_rmdir (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rmdir, local, loc);
-
- op_ret = stub->args.rmdir_cbk.op_ret;
- errno = stub->args.rmdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (stub->args.rmdir_cbk.op_ret != 0)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int32_t
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_rmdir (ctx, &loc);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_rmdir (const char *path)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rmdir (h, vpath);
-out:
- return op_ret;
-}
-
-int
-libgf_client_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setattr (libglusterfs_client_ctx_t *ctx, loc_t * loc,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, setattr, local, loc,
- stbuf, valid);
-
- op_ret = stub->args.setattr_cbk.op_ret;
- errno = stub->args.setattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.setattr_cbk.statpost);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.setattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_chmod (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chmod (h, vpath, mode);
-out:
- return op_ret;
-}
-
-
-#define LIBGF_DO_CHOWN 1
-#define LIBGF_DO_LCHOWN 2
-
-int
-__glusterfs_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group, int whichop)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- loc_t *oploc = NULL;
- loc_t targetloc = {0, };
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op %d", path, whichop);
- stbuf.ia_uid = owner;
- stbuf.ia_gid = group;
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename ((char *)name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- oploc = &loc;
- if (whichop == LIBGF_DO_LCHOWN)
- goto do_lchown;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_lchown;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
-
- oploc = &targetloc;
-do_lchown:
- op_ret = libgf_client_setattr (ctx, oploc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_CHOWN);
-}
-
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chown (h, vpath, owner, group);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_LCHOWN);
-}
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lchown (h, vpath, owner, group);
-out:
- return op_ret;
-}
-
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- fd_t *dirfd = NULL;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (!IA_ISDIR (loc.inode->ia_type) && !IA_ISLNK (loc.inode->ia_type)) {
- errno = ENOTDIR;
- op_ret = -1;
- goto out;
- }
-
- dirfd = fd_create (loc.inode, ctx->pid);
- op_ret = libgf_client_opendir (ctx, &loc, dirfd);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- goto out;
- }
-
- if (!libgf_get_fd_ctx (dirfd)) {
- if (!(libgf_alloc_fd_ctx (ctx, dirfd, (char *)path))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Context "
- "allocation failed");
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
- }
-
-out:
- if (name)
- FREE (name);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- }
-
- libgf_client_loc_wipe (&loc);
- return dirfd;
-}
-
-glusterfs_dir_t
-glusterfs_opendir (const char *path)
-{
- char vpath[PATH_MAX];
- glusterfs_dir_t dir = NULL;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- dir = glusterfs_glh_opendir (h, vpath);
-out:
- return dir;
-}
-
-int
-glusterfs_closedir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, dirfd, out);
- fdctx = libgf_get_fd_ctx (dirfd);
-
- if (fdctx == NULL) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- op_ret = libgf_client_flush (fdctx->ctx, (fd_t *)dirfd);
- fd_unref ((fd_t *)dirfd);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_fsetattr (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetattr, local, fd, stbuf, valid);
-
- op_ret = stub->args.fsetattr_cbk.op_ret;
- errno = stub->args.fsetattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fsetattr_cbk.statpost);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fsetattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- fdctx = libgf_get_fd_ctx (fd);
-
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t uid, gid_t gid)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- stbuf.ia_uid = uid;
- stbuf.ia_gid = gid;
-
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsync_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fsync (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsync, local, fd, 0);
-
- op_ret = stub->args.fsync_cbk.op_ret;
- errno = stub->args.fsync_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsync (glusterfs_file_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx ((fd_t *)fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fsync (fdctx->ctx, (fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator
- ,int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_ftruncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_ftruncate (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- off_t length)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- if (!(((fd->flags & O_ACCMODE) == O_RDWR)
- || ((fd->flags & O_ACCMODE) == O_WRONLY))) {
- errno = EBADF;
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, ftruncate, local, fd, length);
-
- op_ret = stub->args.ftruncate_cbk.op_ret;
- errno = stub->args.ftruncate_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.ftruncate_cbk.postbuf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.ftruncate_cbk.postbuf);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&fdctx->lock);
- {
- fdctx->offset = stub->args.ftruncate_cbk.postbuf.ia_size;
- }
- pthread_mutex_unlock (&fdctx->lock);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_ftruncate (fdctx->ctx, fd, length);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_link_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_link (libglusterfs_client_ctx_t *ctx, loc_t *old, loc_t *new)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int op_ret = -1;
- inode_t *inode = NULL;
- struct iatt *sbuf = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, link, local, old, new);
-
- op_ret = stub->args.link_cbk.op_ret;
- errno = stub->args.link_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d,"
- " errno %d", old->path, new->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.link_cbk.inode;
- sbuf = &stub->args.link_cbk.buf;
- inode_link (inode, new->parent, basename ((char *)new->path), sbuf);
- libgf_transform_iattr (ctx, inode, sbuf);
- inode_lookup (inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT, sbuf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- int op_ret = -1;
- loc_t old = {0,};
- loc_t new = {0,};
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- old.path = strdup (oldpath);
- if (!old.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&old, ctx, 1);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- oldname = strdup (old.path);
- op_ret = libgf_client_loc_fill (&old, ctx, 0, old.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (IA_ISDIR (old.inode->ia_type)) {
- errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- new.path = strdup (newpath);
- if (!new.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&new, ctx, 1);
- if (op_ret == 0) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- newname = strdup (new.path);
- new.inode = inode_ref (old.inode);
- libgf_client_loc_fill (&new, ctx, 0, new.parent->ino,
- basename (newname));
- op_ret = libgf_client_link (ctx, &old, &new);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&old);
- libgf_client_loc_wipe (&new);
-
- return op_ret;
-}
-
-int
-glusterfs_link (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- /* Cannot hard link across glusterfs mounts. */
- if (newh != oldh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_link (newh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_statfs_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_statvfs (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- struct statvfs *buf)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int32_t op_ret = -1;
-
- /* statfs fop receives struct statvfs as an argument */
-
- /* libgf_client_statfs_cbk will be the callback, not
- libgf_client_statvfs_cbk. see definition of LIBGF_CLIENT_FOP
- */
- LIBGF_CLIENT_FOP (ctx, stub, statfs, local, loc);
-
- op_ret = stub->args.statfs_cbk.op_ret;
- errno = stub->args.statfs_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (buf)
- memcpy (buf, &stub->args.statfs_cbk.buf, sizeof (*buf));
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf)
-{
- struct statvfs stvfs = {0, };
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, &stvfs);
- if (op_ret == 0) {
-#ifdef GF_SOLARIS_HOST_OS
- buf->f_fstyp = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
-#else
- buf->f_type = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_bavail = stvfs.f_bavail;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
- /* FIXME: buf->f_fsid has either "val" or "__val" as member
- based on conditional macro expansion. see definition of
- fsid_t - Raghu
- It seems have different structure member names on
- different archs, so I am stepping down to doing a struct
- to struct copy. :Shehjar
- */
- memcpy (&buf->f_fsid, &stvfs.f_fsid, sizeof (stvfs.f_fsid));
- buf->f_namelen = stvfs.f_namemax;
-#endif
- }
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statfs (const char *path, struct statfs *buf)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning"
- " EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, buf);
- if (op_ret != -1)
- /* Should've been a call to libgf_transform_iattr but
- * that only handles struct stat
- */
- buf->f_fsid = (unsigned long)ctx->fake_fsid;
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf)
-{
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statvfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rename_cbk_stub (frame, NULL, op_ret, op_errno,
- buf, preoldparent,
- postoldparent, prenewparent,
- postnewparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_rename (libglusterfs_client_ctx_t *ctx, loc_t *oldloc,
- loc_t *newloc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rename, local, oldloc, newloc);
-
- op_ret = stub->args.rename_cbk.op_ret;
- errno = stub->args.rename_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d, errno"
- " %d", oldloc->path, newloc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (!libgf_get_inode_ctx (newloc->inode))
- libgf_alloc_inode_ctx (ctx, newloc->inode);
-
- libgf_transform_iattr (ctx, newloc->inode, &stub->args.rename_cbk.buf);
- libgf_update_iattr_cache (newloc->inode, LIBGF_UPDATE_STAT,
- &stub->args.rename_cbk.buf);
-
- inode_unlink (oldloc->inode, oldloc->parent, oldloc->name);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- loc_t oldloc = {0, }, newloc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *newname = NULL;
- char *oldname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldloc.path = strdup (oldpath);
- if (!oldloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&oldloc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", oldloc.path);
- goto out;
- }
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
-
- oldname = strdup (oldloc.path);
- op_ret = libgf_client_loc_fill (&oldloc, ctx, 0, oldloc.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- op_ret = libgf_client_rename (ctx, &oldloc, &newloc);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&newloc);
- libgf_client_loc_wipe (&oldloc);
-
- return op_ret;
-}
-
-
-int
-glusterfs_rename (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- if (oldh != newh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rename (oldh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2])
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- stbuf.ia_atime = times[0].tv_sec;
- stbuf.ia_atime_nsec = times[0].tv_usec * 1000;
- stbuf.ia_mtime = times[1].tv_sec;
- stbuf.ia_mtime_nsec = times[1].tv_usec * 1000;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utimes (const char *path, const struct timeval times[2])
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utimes (h, vpath, times);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (buf) {
- stbuf.ia_atime = buf->actime;
- stbuf.ia_atime_nsec = 0;
-
- stbuf.ia_mtime = buf->modtime;
- stbuf.ia_mtime_nsec = 0;
- }
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utime (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mknod_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_mknod (libglusterfs_client_ctx_t *ctx, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mknod, local, loc, mode, rdev);
-
- op_ret = stub->args.mknod_cbk.op_ret;
- errno = stub->args.mknod_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- inode = stub->args.mknod_cbk.inode;
- inode_link (inode, loc->parent, loc->name, &stub->args.mknod_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.mknod_cbk.buf);
- inode_lookup (inode);
-
- if (!libgf_alloc_inode_ctx (ctx, inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.mknod_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *path, mode_t mode,
- dev_t dev)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- FREE (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
-
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
- dev_t dev = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to resolve name");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode | S_IFIFO, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- free (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mkfifo (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkfifo (h, vpath, mode);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_unlink_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_unlink (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, unlink, local, loc);
-
- op_ret = stub->args.unlink_cbk.op_ret;
- errno = stub->args.unlink_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", loc->path);
- if (op_ret == -1)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_unlink (const char *path)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_unlink (h, vpath);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_symlink_cbk_stub (frame, NULL, op_ret,
- op_errno, inode, buf,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_symlink (libglusterfs_client_ctx_t *ctx, const char *linkpath,
- loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, symlink, local, linkpath, loc);
-
- op_ret = stub->args.symlink_cbk.op_ret;
- errno = stub->args.symlink_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s, status %d"
- " errno %d", linkpath, loc->path, op_ret, errno);
- inode = stub->args.symlink_cbk.inode;
- inode_link (inode, loc->parent, loc->name,
- &stub->args.symlink_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.symlink_cbk.buf);
- inode_lookup (inode);
- if (!libgf_get_inode_ctx (inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.symlink_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t oldloc = {0, };
- loc_t newloc = {0, };
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
- /* Old path does not need to be interpreted or looked up */
- oldloc.path = strdup (oldpath);
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
- if (op_ret == 0) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "new path (%s) already exists, "
- " returning EEXIST", newloc.path);
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- newloc.inode = inode_new (ctx->itable);
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
-
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_symlink (ctx, oldpath, &newloc);
-
-out:
- if (newname)
- FREE (newname);
-
- if (oldname)
- FREE (oldname);
- libgf_client_loc_wipe (&oldloc);
- libgf_client_loc_wipe (&newloc);
- return op_ret;
-}
-
-int
-glusterfs_symlink (const char *oldpath, const char *newpath)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
-
- h = libgf_resolved_path_handle (newpath, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_symlink (h, oldpath, vpath);
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readlink_cbk_stub (frame, NULL, op_ret,
- op_errno, path, sbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc, char *buf,
- size_t bufsize)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- size_t cpy_size = 0;
-
- LIBGF_CLIENT_FOP (ctx, stub, readlink, local, loc, bufsize);
-
- op_ret = stub->args.readlink_cbk.op_ret;
- errno = stub->args.readlink_cbk.op_errno;
-
- if (op_ret != -1) {
- cpy_size = ((op_ret <= bufsize) ? op_ret : bufsize);
- memcpy (buf, stub->args.readlink_cbk.buf, cpy_size);
- op_ret = cpy_size;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, target: %s,"
- " status %d, errno %d", loc->path, buf, op_ret, errno);
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, status %d, "
- "errno %d", loc->path, op_ret, errno);
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (bufsize < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (bufsize == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_readlink (ctx, &loc, buf, bufsize);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_readlink (h, vpath, buf, bufsize);
-out:
- return op_ret;
-}
-
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path)
-{
- char *buf = NULL;
- char *rpath = NULL;
- char *start = NULL, *end = NULL;
- char *dest = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- long int path_max = 0;
- char *ptr = NULL;
- struct stat stbuf = {0, };
- long int new_size = 0;
- char *new_rpath = NULL;
- int dest_offset = 0;
- char *rpath_limit = 0;
- int ret = 0, num_links = 0;
- char *vpath = NULL, *tmppath = NULL;
- char absolute_path[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
-#ifdef PATH_MAX
- path_max = PATH_MAX;
-#else
- path_max = pathconf (path, _PC_PATH_MAX);
- if (path_max <= 0) {
- path_max = 1024;
- }
-#endif
-
- if (resolved_path == NULL) {
- rpath = CALLOC (1, path_max);
- if (rpath == NULL) {
- errno = ENOMEM;
- goto out;
- }
- } else {
- rpath = resolved_path;
- }
-
- rpath_limit = rpath + path_max;
-
- if (path[0] == '/') {
- rpath[0] = '/';
- dest = rpath + 1;
- } else {
- /*
- FIXME: can $CWD be a valid path on glusterfs server? hence is
- it better to handle this case or just return EINVAL for
- relative paths?
- */
- ptr = getcwd (rpath, path_max);
- if (ptr == NULL) {
- goto err;
- }
- dest = rpath + strlen (rpath);
- }
-
- for (start = end = (char *)path; *end; start = end) {
- if (dest[-1] != '/') {
- *dest++ = '/';
- }
-
- while (*start == '/') {
- start++;
- }
-
- for (end = start; *end && *end != '/'; end++);
-
- if ((end - start) == 0) {
- break;
- }
-
- if ((end - start == 1) && (start[0] == '.')) {
- /* do nothing */
- } else if (((end - start) == 2) && (start[0] == '.')
- && (start[1] == '.')) {
- if (dest > rpath + 1) {
- while (--dest[-1] != '/');
- }
- } else {
- if ((dest + (end - start + 1)) >= rpath_limit) {
- if (resolved_path == NULL) {
- errno = ENAMETOOLONG;
- if (dest > rpath + 1)
- dest--;
- *dest = '\0';
- goto err;
- }
-
- dest_offset = dest - rpath;
- new_size = rpath_limit - rpath;
- if ((end - start + 1) > path_max) {
- new_size = (end - start + 1);
- } else {
- new_size = path_max;
- }
-
- new_rpath = realloc (rpath, new_size);
- if (new_rpath == NULL) {
- goto err;
- }
-
-
- dest = new_rpath + dest_offset;
- rpath = new_rpath;
- rpath_limit = rpath + new_size;
- }
-
- memcpy (dest, start, end - start);
- dest += end - start;
- *dest = '\0';
-
- ret = glusterfs_glh_lstat (handle, rpath, &stbuf);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "glusterfs_glh_stat returned -1 for"
- " path (%s):(%s)", rpath,
- strerror (errno));
- goto err;
- }
-
- if (S_ISLNK (stbuf.st_mode)) {
- buf = calloc (1, path_max);
- if (buf == NULL) {
- errno = ENOMEM;
- goto err;
- }
-
- if (++num_links > MAXSYMLINKS)
- {
- errno = ELOOP;
- FREE (buf);
- goto err;
- }
-
- ret = glusterfs_glh_readlink (handle, rpath,
- buf,
- path_max - 1);
- if (ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "glusterfs_readlink returned %d"
- " for path (%s):(%s)",
- ret, rpath, strerror (errno));
- FREE (buf);
- goto err;
- }
- buf[ret] = '\0';
-
- if (buf[0] != '/') {
- tmppath = strdup (rpath);
- tmppath = dirname (tmppath);
- sprintf (absolute_path, "%s/%s",
- tmppath, buf);
- FREE (buf);
- buf = libgf_resolve_path_light ((char *)absolute_path);
- FREE (tmppath);
- }
-
- rpath = glusterfs_glh_realpath (handle, buf,
- rpath);
- FREE (buf);
- if (rpath == NULL) {
- goto out;
- }
- dest = rpath + strlen (rpath);
-
- } else if (!S_ISDIR (stbuf.st_mode) && *end != '\0') {
- errno = ENOTDIR;
- goto err;
- }
- }
- }
- if (dest > rpath + 1 && dest[-1] == '/')
- --dest;
- *dest = '\0';
-
-out:
- if (vpath)
- FREE (vpath);
- return rpath;
-
-err:
- if (vpath)
- FREE (vpath);
- if (resolved_path == NULL) {
- FREE (rpath);
- }
-
- return NULL;
-}
-
-char *
-glusterfs_realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- char *realp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- realp = CALLOC (PATH_MAX, sizeof (char));
- if (!realp)
- goto out;
-
- libgf_vmp_search_vmp (h, realp, PATH_MAX);
- res = glusterfs_glh_realpath (h, vpath, resolved_path);
- if (!res)
- goto out;
-
- /* This copy is needed to ensure that when we return the real resolved
- * path, we return a path that accounts for the app's view of the
- * path, i.e. it starts with the VMP, in case this is an absolute path.
- */
- if (libgf_path_absolute (path)) {
- strcat (realp, resolved_path);
- strcpy (resolved_path, realp);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, resolved %s", path,
- resolved_path);
-out:
- if (realp)
- FREE (realp);
-
- return res;
-}
-
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path)
-{
- loc_t loc = {0, };
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, handle, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1)
- goto out;
-
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_rmdir (ctx, &loc);
- else
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- return op_ret;
-
-}
-
-int
-glusterfs_remove(const char *pathname)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_remove (h, vpath);
-out:
- return op_ret;
-}
-
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- off_t off = -1;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- off = fd_ctx->offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return off;
-}
-
-struct libgf_client_sendfile_data {
- int reads_sent;
- int reads_completed;
- int out_fd;
- int32_t op_ret;
- int32_t op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-};
-
-int
-libgf_client_sendfile_read_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- struct libgf_client_sendfile_data *sendfile_data = cbk_data;
- int bytes = 0;
-
- if (op_ret > 0) {
- bytes = writev (sendfile_data->out_fd, buf->vector, buf->count);
- if (bytes != op_ret) {
- op_ret = -1;
- op_errno = errno;
- }
-
- glusterfs_free (buf);
- }
-
- pthread_mutex_lock (&sendfile_data->lock);
- {
- if (sendfile_data->op_ret != -1) {
- if (op_ret == -1) {
- sendfile_data->op_ret = -1;
- sendfile_data->op_errno = op_errno;
- } else {
- sendfile_data->op_ret += op_ret;
- }
- }
-
- sendfile_data->reads_completed++;
-
- if (sendfile_data->reads_completed
- == sendfile_data->reads_sent) {
- pthread_cond_broadcast (&sendfile_data->cond);
- }
- }
- pthread_mutex_unlock (&sendfile_data->lock);
-
- return 0;
-}
-
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count)
-{
- ssize_t ret = -1;
- struct libgf_client_sendfile_data cbk_data = {0, };
- off_t off = -1;
- size_t size = 0;
- int flags = 0;
- int non_block = 0;
-
-
- pthread_mutex_init (&cbk_data.lock, NULL);
- pthread_cond_init (&cbk_data.cond, NULL);
- cbk_data.out_fd = out_fd;
-
- if (offset) {
- off = *offset;
- }
-
- flags = fcntl (out_fd, F_GETFL);
-
- if (flags != -1) {
- non_block = flags & O_NONBLOCK;
-
- if (non_block) {
- ret = fcntl (out_fd, F_SETFL, flags & ~O_NONBLOCK);
- }
- }
-
- while (count != 0) {
- /*
- * FIXME: what's the optimal size for reads and writes?
- */
- size = (count > LIBGF_SENDFILE_BLOCK_SIZE) ?
- LIBGF_SENDFILE_BLOCK_SIZE : count;
-
- /*
- * we don't wait for reply to previous read, we just send all
- * reads in a single go.
- */
- ret = glusterfs_read_async (in_fd, size, off,
- libgf_client_sendfile_read_cbk,
- &cbk_data);
- if (ret == -1) {
- break;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- cbk_data.reads_sent++;
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset) {
- off += size;
- }
-
- count -= size;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- /*
- * if we've not received replies to all the reads we've sent,
- * wait for them
- */
- if (cbk_data.reads_sent > cbk_data.reads_completed) {
- pthread_cond_wait (&cbk_data.cond,
- &cbk_data.lock);
- }
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset != NULL) {
- *offset = off;
- }
-
- /* if we were able to stack_wind all the reads */
-
- if (ret == 0) {
- ret = cbk_data.op_ret;
- errno = cbk_data.op_errno;
- }
-
- if (non_block) {
- fcntl (out_fd, F_SETFL, flags);
- }
-
- return ret;
-}
-
-
-static int32_t
-libgf_client_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_lk_cbk_stub (frame, NULL, op_ret, op_errno,
- lock);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-
-int
-libgf_client_lk (libglusterfs_client_ctx_t *ctx, fd_t *fd, int cmd,
- struct flock *lock)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP(ctx, stub, lk, local, fd, cmd, lock);
-
- op_ret = stub->args.lk_cbk.op_ret;
- errno = stub->args.lk_cbk.op_errno;
- if (op_ret == 0) {
- *lock = stub->args.lk_cbk.lock;
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...)
-{
- int ret = -1;
- struct flock *lock = NULL;
- va_list ap;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (cmd) {
- case F_SETLK:
- case F_SETLKW:
- case F_GETLK:
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (!lock) {
- errno = EINVAL;
- goto out;
- }
-
- ret = libgf_client_lk (ctx, fd, cmd, lock);
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- return ret;
-}
-
-
-int
-libgf_client_chdir (const char *path)
-{
- int op_ret = 0;
- uint32_t resulting_cwd_len = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!libgf_path_absolute (path)) {
- resulting_cwd_len = strlen (path) + strlen (cwd)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
- strcat (cwd, path);
- } else {
- resulting_cwd_len = strlen (path)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (cwd, path);
- }
-
- if (cwd[strlen (cwd) - 1] != '/') {
- strcat (cwd, "/");
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return op_ret;
-}
-
-
-int
-glusterfs_fchdir (glusterfs_file_t fd)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- char vmp[PATH_MAX];
- char *res = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- glusterfs_handle_t handle = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- /* FIXME: there is a race-condition between glusterfs_fchdir and
- glusterfs_close. If two threads of application call glusterfs_fchdir
- and glusterfs_close on the same fd, there is a possibility of
- glusterfs_fchdir accessing freed memory of fd_ctx.
- */
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- handle = fd_ctx->ctx;
- strcpy (vpath, fd_ctx->vpath);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- if (vpath[0] == '\0') {
- errno = ENOTDIR;
- goto out;
- }
-
- res = libgf_vmp_search_vmp (handle, vmp, PATH_MAX);
- if (res == NULL) {
- errno = EBADF;
- goto out;
- }
-
- /* both vmp and vpath are terminated with '/'. Also path starts with a
- '/'. Hence the extra '/' amounts to NULL character at the end of the
- string.
- */
- if ((strlen (vmp) + strlen (vpath)) > PATH_MAX) {
- errno = ENAMETOOLONG;
- goto out;
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, vmp);
- res = vpath;
- if (res[0] == '/') {
- res++;
- }
-
- strcat (cwd, res);
- }
- pthread_mutex_unlock (&cwdlock);
-
- op_ret = 0;
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_chdir (const char *path)
-{
- int32_t op_ret = -1;
- glusterfs_handle_t handle = NULL;
- loc_t loc = {0, };
- char vpath[PATH_MAX];
-
- handle = libgf_resolved_path_handle (path, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction "
- "failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, handle, 0);
- }
-
- if ((handle == NULL) || (op_ret == 0)) {
- op_ret = libgf_client_chdir (path);
- }
-
-out:
- return op_ret;
-}
-
-
-char *
-glusterfs_getcwd (char *buf, size_t size)
-{
- char *res = NULL;
- size_t len = 0;
- loc_t loc = {0, };
- glusterfs_handle_t handle = NULL;
- char vpath[PATH_MAX];
- int32_t op_ret = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!cwd_inited) {
- errno = ENODEV;
- goto unlock;
- }
-
- if (buf == NULL) {
- buf = CALLOC (1, len);
- if (buf == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "out of memory");
- goto unlock;
- }
- } else {
- if (size == 0) {
- errno = EINVAL;
- goto unlock;
- }
-
- if (len > size) {
- errno = ERANGE;
- goto unlock;
- }
- }
-
- strcpy (buf, cwd);
- res = buf;
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- if (res != NULL) {
- handle = libgf_resolved_path_handle (res, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (loc.path == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "strdup failed");
- } else {
- op_ret = libgf_client_path_lookup (&loc, handle,
- 0);
- if (op_ret == -1) {
- res = NULL;
- }
- }
- }
- }
-
- return res;
-}
-
-int32_t
-libgf_client_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_truncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_truncate (libglusterfs_client_ctx_t *ctx,
- loc_t *loc, off_t length)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, truncate, local, loc, length);
-
- op_ret = stub->args.truncate_cbk.op_ret;
- errno = stub->args.truncate_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == -1) {
- goto out;
- }
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.truncate_cbk.postbuf);
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.truncate_cbk.postbuf);
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_truncate (glusterfs_handle_t handle, const char *path,
- off_t length)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_truncate (ctx, &loc, length);
-
-out:
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_truncate (const char *path, off_t length)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path:%s length:%"PRIu64, path,
- length);
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_truncate (h, vpath, length);
-out:
- return op_ret;
-}
-
-static struct xlator_fops libgf_client_fops = {
-};
-
-static struct xlator_cbks libgf_client_cbks = {
- .forget = libgf_client_forget,
- .release = libgf_client_release,
- .releasedir = libgf_client_releasedir,
-};
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph)
-{
- int ret = 0;
- xlator_t *top = NULL;
- xlator_list_t *xlchild, *xlparent;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
-
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- top->name = strdup (LIBGF_XL_NAME);
-
- xlparent = CALLOC (1, sizeof(*xlparent));
- xlparent->xlator = top;
- graph->parents = xlparent;
- ret = asprintf (&top->type, LIBGF_XL_NAME);
- if (-1 == ret) {
- fprintf (stderr, "failed to set the top xl's type");
- }
-
- top->init = libgf_client_init;
- top->fops = &libgf_client_fops;
- top->mops = &libgf_client_mops;
- top->cbks = &libgf_client_cbks;
- top->notify = libgf_client_notify;
- top->fini = libgf_client_fini;
- // fill_defaults (top);
-
- return top;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient.h b/libglusterfsclient/src/libglusterfsclient.h
deleted file mode 100755
index 1691a2faa..000000000
--- a/libglusterfsclient/src/libglusterfsclient.h
+++ /dev/null
@@ -1,1363 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LIBGLUSTERFSCLIENT_H
-#define _LIBGLUSTERFSCLIENT_H
-
-#ifndef __BEGIN_DECLS
-#ifdef __cplusplus
-#define __BEGIN_DECLS extern "C" {
-#else
-#define __BEGIN_DECLS
-#endif
-#endif
-
-#ifndef __END_DECLS
-#ifdef __cplusplus
-#define __END_DECLS }
-#else
-#define __END_DECLS
-#endif
-#endif
-
-
-__BEGIN_DECLS
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <utime.h>
-#include <sys/time.h>
-#include <stdint.h>
-
-typedef struct {
- struct iovec *vector;
- int count;
- void *iobref;
- void *dictref;
-} glusterfs_iobuf_t;
-
-
-typedef
-int (*glusterfs_readv_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- void *cbk_data);
-
-typedef
-int (*glusterfs_write_cbk_t) (int op_ret, int op_errno, void *cbk_data);
-
-typedef
-int (*glusterfs_get_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- struct stat *stbuf, void *cbk_data);
-
-
-/* Data Interface
- * The first section describes the data structures required for
- * using libglusterfsclient.
- */
-
-/* This structure needs to be filled up and
- * passed to te glusterfs_init function which uses
- * the params passed herein to initialize a glusterfs
- * client context and then connect to a glusterfs server.
- */
-typedef struct {
- char *logfile; /* Path to the file which will store
- the log.
- */
- char *loglevel; /* The log level required for
- reporting various events within
- libglusterfsclient.
- */
- struct {
- char *specfile; /* Users can either open a volume or
- specfile and assign the pointer to
- specfp, or just refer to the volume
- /spec file path in specfile.
- */
- FILE *specfp;
- };
- char *volume_name; /* The volume file could describe many
- volumes but the specific volume
- within that file is chosen by
- specifying the volume name here.
- */
- unsigned long lookup_timeout; /* libglusterclient provides the inode
- numbers to be cached by the library.
- The duration for which these are
- cached are defined by lookup_timeout
- . In Seconds.
- */
- unsigned long stat_timeout; /* The file attributes received from
- a stat syscall can also be cached
- for the duration specified in this
- member. In Seconds.
- */
-} glusterfs_init_params_t;
-
-
-
-/* This is the handle returned by glusterfs_init
- * once the initialization is complete.
- * Users should treat this as an opaque handle.
- */
-typedef void * glusterfs_handle_t;
-
-
-
-/* These identifiers are used as handles for files and dirs.
- * Users of libglusterfsclient should not in anyway try to interpret
- * the actual structures these will point to.
- */
-typedef void * glusterfs_file_t;
-typedef void * glusterfs_dir_t;
-
-
-/* Function Call Interface */
-/* libglusterfsclient initialization function.
- * @ctx : the structure described above filled with required values.
- * @fakefsid: User generated fsid to be used to identify this
- * volume.
- *
- * Returns NULL on failure and the non-NULL pointer on success.
- * On failure, the error description might be present in the logfile
- * depending on the log level.
- */
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *ctx, uint32_t fakefsid);
-
-
-
-/* Used to destroy a glusterfs client context and the
- * connection to the glusterfs server.
- *
- * @handle : The glusterfs handle returned by glusterfs_init.
- */
-int
-glusterfs_fini (glusterfs_handle_t handle);
-
-
-
-/* libglusterfs client provides two interfaces.
- * 1. handle-based interface
- * Functions that comprise the handle-based interface accept the
- * glusterfs_handle_t as the first argument. It specifies the
- * glusterfs client context over which to perform the operation.
- *
- * 2. Virtual Mount Point based interface:
- * Functions that do not require a handle to be given in order to
- * identify which client context to operate on. This interface
- * internally determines the corresponding client context for the
- * given path. The down-side is that a virtual mount point (VMP) needs to be
- * registered with the library. A VMP is just a string that maps to a
- * glusterfs_handle_t. The advantage of a VMP based interface is that
- * a user program using multiple client contexts does not need to
- * maintain its own mapping between paths and the corresponding
- * handles.
- */
-
-
-
-/* glusterfs_mount is the function that allows users to register a VMP
- * along with the parameters, which will be used to initialize a
- * context. Applications calling glusterfs_mount do not need to
- * initialized a context using the glusterfs_init interface.
- *
- * @vmp : The virtual mount point.
- * @ipars : Initialization parameters populated as described
- * earlier.
- *
- * Returns 0 on success, and -1 on failure.
- */
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars);
-
-
-
-/* glusterfs_umount is the VMP equivalent of glusterfs_fini.
- *
- * @vmp : The VMP which was initialized using glusterfs_mount.
- *
- * Returns 0 on sucess, and -1 on failure.
- */
-int
-glusterfs_umount (char *vmp);
-
-
-/* glusterfs_umount_all unmounts all the mounts */
-int
-glusterfs_umount_all (void);
-
-
-/* For smaller files, application can use just
- * glusterfs_get/glusterfs_get_async
- * to read the whole content. Limit of the file-sizes to be read in
- * glusterfs_get/glusterfs_get_async is passed in the size argument
- */
-
-/* glusterfs_glh_get:
- * @handle : glusterfs handle
- * @path : path to be looked upon
- * @size : upper limit of file-sizes to be read in lookup
- * @stbuf : attribute buffer
- */
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf);
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf);
-
-int
-glusterfs_get_async (glusterfs_handle_t handle, const char *path, size_t size,
- glusterfs_get_cbk_t cbk, void *cbk_data);
-
-
-
-/* Opens a file. Corresponds to the open syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file or directory on the glusterfs
- * export. Must be absolute to the export on the server.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns a non-NULL handle on success. NULL on failure and sets
- * errno accordingly.
- */
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,
- ...);
-
-
-/* Opens a file without having to specify a handle.
- *
- * @path : Path to the file to open in the glusterfs export.
- * The path to the file in glusterfs export must be
- * pre-fixed with the VMP string registered with
- * glusterfs_mount.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns 0 on success, -1 on failure with errno set accordingly.
- */
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...);
-
-
-
-/* Creates a file. Corresponds to the creat syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file that needs to be created in the
- * glusterfs export.
- * @mode : File creation mode.
- *
- * Returns the file handle on success. NULL on error with errno set as
- * required.
- */
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* VMP-based creat.
- * @path : Path to the file to be created. Must be
- * pre-prepended with the VMP string registered with
- * glusterfs_mount.
- * @mode : File creation mode.
- *
- * Returns file handle on success. NULL handle on error with errno set
- * accordingly.
- */
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode);
-
-
-
-/* Close the file identified by the handle.
- *
- * @fd : Closes the file.
- *
- * Returns 0 on success, -1 on error with errno set accordingly.
- */
-int
-glusterfs_close (glusterfs_file_t fd);
-
-
-
-/* Get struct stat for the file in path.
- *
- * @handle : The handle that identifies a glusterfs client
- * context.
- * @path : The file for which we need to get struct stat.
- * @stbuf : The buffer into which the file's stat is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *stbuf);
-
-
-/* Get struct stat for file in path.
- *
- * @path : The file for which struct stat is required.
- * @sbuf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_stat (const char *path, struct stat *buf);
-
-
-
-/* Gets stat struct for the file.
- *
- * @handle : The handle identifying a glusterfs client context.
- * @path : Path to the file for which stat structure is
- * required. If path is a symlink, the symlink is
- * interpreted and the stat structure returned for the
- * target of the link.
- * @buf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path,
- struct stat *buf);
-
-
-
-/* Gets stat struct for a file.
- *
- * @path : The file to get the struct stat for.
- * @buf : The receiving struct stat buffer.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_lstat (const char *path, struct stat *buf);
-
-
-
-/* Get stat structure for a file.
- *
- * @fd : The file handle identifying a file on the glusterfs
- * server.
- * @stbuf : The buffer into which the stat data is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_fstat (glusterfs_file_t fd, struct stat *stbuf);
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags);
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd, char *list, size_t size);
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd, const char *name);
-
-
-
-/* Read data from a file.
- * @fd : Handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- *
- * Returns number of bytes actually read on success or -1 on error
- * with errno set to the appropriate error number.
- */
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes);
-
-
-
-/* Read data into an array of buffers.
- *
- * @fd : File handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @vec : Array of buffers into which the data is read.
- * @count : Number of iovecs referred to by vec.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count);
-
-int
-glusterfs_read_async (glusterfs_file_t fd, size_t nbytes, off_t offset,
- glusterfs_readv_cbk_t readv_cbk, void *cbk_data);
-
-
-
-/* Write data into a file.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer which is written to the file.
- * @nbytes : Number bytes of the @buf written to the file.
- *
- * On success, returns number of bytes written. On error, returns -1
- * with errno set appropriately.
- */
-ssize_t
-glusterfs_write (glusterfs_file_t fd, const void *buf, size_t nbytes);
-
-
-
-/* Writes an array of buffers into a file.
- *
- * @fd : The file handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @vector : Array of buffers to be written to the file.
- * @count : Number of separate buffers in the @vector array.
- *
- * Returns number of bytes written on success or -1 on error with
- * errno set approriately.
- */
-ssize_t
-glusterfs_writev (glusterfs_file_t fd, const struct iovec *vector, int count);
-
-int
-glusterfs_write_async (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset, glusterfs_write_cbk_t write_cbk,
- void *cbk_data);
-
-int
-glusterfs_writev_async (glusterfs_file_t fd, const struct iovec *vector,
- int count, off_t offset,
- glusterfs_write_cbk_t write_cbk, void *cbk_data);
-
-
-
-/* Read from a file starting at a given offset.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- * @offset : The offset to start reading @nbytes from.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_pread (glusterfs_file_t fd, void *buf, size_t nbytes, off_t offset);
-
-
-
-/* Write to a file starting at a given offset.
- *
- * @fd : Flie handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer that will be written to the file.
- * @nbytes : Number of bytes to write from @buf.
- * @offset : The starting offset from where @nbytes will be
- * written.
- *
- * Returns number of bytes written on success and -1 on error with
- * errno set appropriately.
- */
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset);
-
-
-
-/* Seek to an offset in the file.
- *
- * @fd : File handle in which to seek to. File handle
- * returned by glusterfs_open or glusterfs_glh_open.
- * @offset : Offset to seek to in the given file.
- * @whence : Determines how the offset is interpreted by this
- * syscall. The behaviour is similar to the options
- * provided by the POSIX lseek system call. See man lseek
- * for more details.
- *
- * On success, returns the resulting absolute offset in the file after the seek
- * operation is performed. ON error, returns -1 with errno set
- * appropriately.
- */
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence);
-
-
-
-/* Create a directory.
- *
- * @handle : The handle of the glusterfs context in which the
- * directory needs to be created.
- * @path : The absolute path within the glusterfs context where
- * the directory needs to be created.
- * @mode : The mode bits for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Create a directory.
- *
- * @path : Path to the directory that needs to be created. This
- * path must be prefixed with the VMP of the particular glusterfs
- * context.
- * @mode : Mode flags for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkdir (const char *path, mode_t mode);
-
-
-
-/* Remove a directory.
- *
- * @handle : Handle of the glusterfs context from which to remove
- * the directory.
- * @path : The path of the directory to be removed in the glusterfs
- * context.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a directory.
- *
- * @path : The absolute path to the directory to be removed.
- * This path must be pre-fixed with the VMP of the
- * particular glusterfs context in which this directory
- * resides.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rmdir (const char *path);
-
-
-
-/* Read directory entries.
- *
- * @fd : The handle of the directory to be read. This handle
- * is the one returned by opendir.
- *
- * Returns the directory entry on success and NULL pointer on error
- * with errno set appropriately.
- */
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd);
-
-
-
-/* re-entrant version of glusterfs_readdir.
- *
- * @dirfd : The handle of directory to be read. This handle is the one
- * returned by opendir.
- * @entry : Pointer to storage to store a directory entry. The storage
- * pointed to by entry shall be large enough for a dirent with
- * an array of char d_name members containing at least
- * {NAME_MAX}+1 elements.
- * @result : Upon successful return, the pointer returned at *result shall
- * have the same value as the argument entry. Upon reaching the
- * end of the directory stream, this pointer shall have the
- * value NULL.
- */
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result);
-
-/* Close a directory handle.
- *
- * @fd : The directory handle to be closed.
- *
- * Returns 0 on success and -1 on error with errno set to 0.
- */
-int
-glusterfs_closedir (glusterfs_dir_t dirfd);
-/* FIXME: remove getdents */
-int
-glusterfs_getdents (glusterfs_dir_t fd, struct dirent *dirp,
- unsigned int count);
-
-
-
-/* Create device node.
- *
- * @handle : glusterfs context in which to create the device
- * node.
- * @pathname : The absolute path of the device to be created in the
- * given glusterfs context.
- *
- * @mode : Mode flags to apply to the newly created node.
- * @dev : Device numbers that will apply to the node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *pathname,
- mode_t mode, dev_t dev);
-
-
-
-/* Create a device node.
- *
- * @pathname : The full path of the node to be created. This path
- * should be pre-pended with the VMP of the glusterfs
- * context in which this node is to be created.
- * @mode : Mode flags that will be applied to the newly created
- * device file.
- * @dev : The device numbers that will be associated with the
- * device node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev);
-
-
-
-/* Returns the real absolute path of the given path.
- *
- * @handle : The glusterfs context in which the path resides in.
- * @path : The path to be resolved.
- * @resolved_path : The resolved path is stored in this buffer
- * provided by the caller.
- *
- * Returns a pointer to resolved_path on success and NULL on error
- * with errno set appropriately.
- *
- * See man realpath for details.
- */
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path);
-
-
-/* Returns the real absolute path of the given path.
- *
- * @path : The path to be resolved. This path must be
- * pre-fixed with the VMP of the glusterfs
- * context in which the file resides.
- *
- * @resolved_path : The resolved path is stored in this user
- * provided buffer.
- *
- * Returns a pointer to resolved_path on success, and NULL on error
- * with errno set appropriately.
- */
-char *
-glusterfs_realpath (const char *path, char *resolved_path);
-
-
-
-/* Change mode flags on a path.
- *
- * @handle : Handle of the glusterfs instance in which the path
- * resides.
- * @path : The path whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Change mode flags on a path.
- *
- * @path : The path whose mode bits need to be changed. The
- * path should be pre-fixed with the VMP that identifies the
- * glusterfs context within which the path resides.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chmod (const char *path, mode_t mode);
-
-
-
-/* Change the owner of a path.
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- *
- * @handle : Handle of the glusterfs context in which the path
- * resides.
- * @path : The path whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of a path.
- *
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- * @path : The path whose owner needs to be changed. Path must
- * be pre-fixed with the VMP that identifies the
- * glusterfs context in which the path resides.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Change the owner of the file.
- *
- * @fd : Handle of the file whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t owner, gid_t group);
-
-
-
-/* Open a directory.
- *
- * @handle : Handle that identifies a glusterfs context.
- * @path : Path to the directory in the glusterfs context.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Open a directory.
- *
- * @path : Path to the directory. The path must be prepended
- * with the VMP in order to identify the glusterfs
- * context in which path resides.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_opendir (const char *path);
-
-
-
-/* Change the mode bits on an open file.
- *
- * @fd : The file whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode);
-
-
-
-/* Sync the file contents to storage.
- *
- * @fd : The file whose contents need to be sync'ed to
- * storage.
- *
- * Return 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fsync (glusterfs_file_t *fd);
-
-
-
-/* Truncate an open file.
- *
- * @fd : The file to truncate.
- * @length : The length to truncate to.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length);
-
-
-
-/* Create a hard link between two paths.
- *
- * @handle : glusterfs context in which both paths should reside.
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a hard link between two paths.
- *
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Both paths should exist on the same glusterfs context and should be
- * prefixed with the same VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_link (const char *oldpath, const char *newpath);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statfs (const char *path, struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utimes (const char *path, const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf);
-
-
-
-/* Create FIFO at the given path.
- *
- * @handle : The glusterfs context in which to create that FIFO.
- * @path : The path within the context where the FIFO is to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path,
- mode_t mode);
-
-
-
-/* Create FIFO at the given path.
- *
- * @path : The path within the context where the FIFO is to be
- * created. @path should begin with the VMP of the
- * glusterfs context in which the FIFO needs to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkfifo (const char *path, mode_t mode);
-
-
-
-/* Unlink a file.
- *
- * @handle : Handle that identifies a glusterfs instance.
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Unlink a file.
- *
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_unlink (const char *path);
-
-
-
-/* Create a symbolic link.
- *
- * @handle : The handle identifying the glusterfs context.
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a symbolic link.
- *
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_symlink (const char *oldpath, const char *newpath);
-
-
-
-/* Read a symbolic link.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize);
-
-
-
-/* Read a symbolic link.
- *
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize);
-
-
-
-/* Rename a file or directory.
- *
- * @handle : The identifier of a glusterfs context.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Rename a file or directory.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rename (const char *oldpath, const char *newpath);
-
-
-
-/* Remove a file or directory in the given glusterfs context.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : Path of the file or directory to be removed.
- *
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a file or directory.
- *
- * @path : Path of the file or directory to be removed. The
- * path must be pre-fixed with the VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_remove (const char *path);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @handle : Handle identifying the glusterfs client context.
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Rewind directory stream pointer to beginning of the directory.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns no value.
- */
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd);
-
-
-
-/* Seek to the given offset in the directory handle.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- * @offset : The offset to seek to.
- *
- * Returns no value.
- */
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset);
-
-
-
-/* Return the current offset in a directory stream.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns the offset in the directory or -1 on error with errno set
- * appropriately.
- */
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd);
-
-
-/* Write count bytes from in_fd to out_fd, starting at *offset.
- * glusterfs_sendfile aims at eliminating memory copy at the end of
- * each read from in_fd, copying the file directly to out_fd from the buffer
- * provided by glusterfs.
- *
- * @out_fd: file descriptor opened for writing
- *
- * @in_fd: glusterfs file handle to the file to be read from.
- *
- * @offset: If offset is not NULL, then it points to a variable holding the file
- * offset from which glusterfs_sendfile() will start reading data
- * from in_fd. When glusterfs_sendfile() returns, this variable will
- * be set to the offset of the byte following the last byte that was
- * read. If offset is not NULL, then glusterfs_sendfile() does not
- * modify the current file offset of in_fd; otherwise the current file
- * offset is adjusted to reflect the number of bytes read from in_fd.
- *
- * @count: number of bytes to copy between the file descriptors.
- */
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count);
-
-/* manipulate file descriptor
- * This api can have 3 forms similar to fcntl(2).
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, long arg)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, struct flock *lock)
- *
- * @fd : file handle returned by glusterfs_open or glusterfs_create.
- * @cmd : Though the aim is to implement all possible commands supported by
- * fcntl(2), currently following commands are supported.
- * F_SETLK, F_SETLKW, F_GETLK - used to acquire, release, and test for
- * the existence of record locks (also
- * known as file-segment or file-region
- * locks). More detailed explanation is
- * found in 'man 2 fcntl'
- */
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...);
-
-/*
- * Change the current working directory to @path
- *
- * @path : path to change the current working directory to.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately.
- */
-int
-glusterfs_chdir (const char *path);
-
-/*
- * Change the current working directory to the path @fd is opened on.
- *
- * @fd : current working directory will be changed to path @fd is opened on.
- *
- * Returns 0 on success and -1 on with errno set appropriately.
- */
-int
-glusterfs_fchdir (glusterfs_file_t fd);
-
-/* copies the current working directory into @buf if it is big enough
- *
- * @buf: buffer to copy into it. If @buf is NULL, a buffer will be allocated.
- * The size of the buffer will be @size if it is not zero, otherwise the
- * size will be big enough to hold the current working directory.
- * @size: size of the buffer.
- *
- * Returns the pointer to buffer holding current working directory on success
- * and NULL on failure.
- */
-
-char *
-glusterfs_getcwd (char *buf, size_t size);
-
-/*
- * Truncate the file to a specified length.
- *
- * @path : path to the file.
- * @length : length to which the file has to be truncated.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately
- */
-
-int
-glusterfs_truncate (const char *path, off_t length);
-
-
-/* FIXME: review the need for these apis */
-/* added for log related initialization in booster fork implementation */
-void
-glusterfs_reset (void);
-
-void
-glusterfs_log_lock (void);
-
-void
-glusterfs_log_unlock (void);
-/* Used to free the glusterfs_read_buf passed to the application from
- glusterfs_read_async_cbk
-*/
-void
-glusterfs_free (glusterfs_iobuf_t *buf);
-
-__END_DECLS
-
-#endif /* !_LIBGLUSTERFSCLIENT_H */
diff --git a/mod_glusterfs/Makefile.am b/mod_glusterfs/Makefile.am
deleted file mode 100644
index 0abe8dcfc..000000000
--- a/mod_glusterfs/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = apache lighttpd
-
-CLEANFILES =
diff --git a/mod_glusterfs/apache/1.3/src/Makefile.am b/mod_glusterfs/apache/1.3/src/Makefile.am
deleted file mode 100644
index 6bb3075f5..000000000
--- a/mod_glusterfs/apache/1.3/src/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/1.3
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs-build.c
- $(APXS) -c -Wc,-g3 -Wc,-O0 -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfsclient/src -Wl,-rpath,$(libdir) -Wl,-rpath,$(top_builddir)/libglusterfsclient/src/.libs/ $(top_builddir)/libglusterfsclient/src/.libs/libglusterfsclient.so mod_glusterfs-build.c -o $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- -rm -fv *.so *.o mod_glusterfs-build.c
diff --git a/mod_glusterfs/apache/1.3/src/README.txt b/mod_glusterfs/apache/1.3/src/README.txt
deleted file mode 100644
index 378a51d79..000000000
--- a/mod_glusterfs/apache/1.3/src/README.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installation of apache-1.3.x, where x is any minor version.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to the way apache has
-been built, currently following points have to be taken care of:
-
-* module "so" has to be enabled, for apache to support modules.
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzvf apache-1.3.9.tar.gz
- $ cd apache-1.3.9/
- $ # add -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 to EXTRA_CFLAGS in src/Configuration.
- $ ./configure --prefix=/usr --enable-module=so
- $ cd src
- $ ./Configure
- $ cd ../
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n mod_glusterfs -ia /usr/lib/glusterfs/1.4.0pre2/apache-1.3/mod_glusterfs.so
-**********************************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is multithreaded.
- If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
-
-TODO
-====
-* directory listing for the directories accessed through mod_glusterfs.
diff --git a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c b/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
deleted file mode 100644
index c1380a4fd..000000000
--- a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glusterfs: Unrecognized log-level "\
- "\"%s\", possible values are \"DEBUG|"\
- "WARNING|ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-module MODULE_VAR_EXPORT glusterfs_module;
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
- size_t xattr_file_size;
- uint32_t cache_timeout;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-static glusterfs_dir_config_t *
-mod_glusterfs_dconfig(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-static
-const char *add_xattr_file_size(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_cache_timeout(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_loglevel(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = arg;
-
- return error;
-}
-
-static
-const char *add_logfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = arg;
-
- return NULL;
-}
-
-static
-const char *add_specfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = arg;
-
- return NULL;
-}
-
-static void *
-mod_glusterfs_create_dir_config(pool *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) ap_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- return (void *) dir_config;
-}
-
-static void
-mod_glusterfs_child_init(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = ap_get_module_config (s->module_config,
- &core_module);
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t params = {0, };
-
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
-
- if (dir_config) {
- memset (&params, 0, sizeof (params));
-
- params.logfile = dir_config->logfile;
- params.loglevel = dir_config->loglevel;
- params.lookup_timeout = dir_config->cache_timeout;
- params.stat_timeout = dir_config->cache_timeout;
- params.specfile = dir_config->specfile;
-
- glusterfs_mount (dir_config->mount_dir, &params);
- }
- dir_config = NULL;
- }
-}
-
-static void
-mod_glusterfs_child_exit(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
-
- mod_core_config = ap_get_module_config (s->module_config, &core_module);
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static int mod_glusterfs_fixup(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int access_status;
- int ret;
- char *path = NULL;
-
- dir_config = mod_glusterfs_dconfig(r);
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (ap_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL),
- r->uri, strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = ap_pstrdup (r->pool, GLUSTERFS_HANDLER);
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &r->finfo);
-
- if (ret == -1 || r->finfo.st_size > dir_config->xattr_file_size
- || S_ISDIR (r->finfo.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, r,
- "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- if (r->uri && strlen (r->uri) && r->uri[strlen(r->uri) - 1] == '/')
- r->handler = NULL;
-
- r->filename = ap_pstrcat (r->pool, r->filename, r->path_info, NULL);
-
- if ((access_status = ap_find_types(r)) != 0) {
- return DECLINED;
- }
-
- return OK;
-}
-
-
-int
-mod_glusterfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glusterfs_read_async (request_rec *r, glusterfs_file_t fd, off_t offset,
- off_t length)
-{
- glusterfs_async_local_t local;
- off_t end;
- int nbytes;
- int complete;
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- for (i = 0; i < buf->count; i++) {
- if (ap_rwrite (buf->vector[i].iov_base,
- buf->vector[i].iov_len, r) < 0) {
- local.op_ret = -1;
- complete = 1;
- break;
- }
- }
-
- glusterfs_free (buf);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? SERVER_ERROR : OK);
-}
-
-static int
-mod_glusterfs_handler(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config;
- char *path = NULL;
- int error = OK;
- int rangestatus = 0;
- int errstatus = OK;
- glusterfs_file_t fd;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] == '/') {
- return DECLINED;
- }
-
- dir_config = mod_glusterfs_dconfig (r);
-
- if (r->method_number != M_GET) {
- return METHOD_NOT_ALLOWED;
- }
-
- ap_update_mtime(r, r->finfo.st_mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- ap_table_setn(r->headers_out, "Accept-Ranges", "bytes");
- if (((errstatus = ap_meets_conditions(r)) != OK)
- || (errstatus = ap_set_content_length(r, r->finfo.st_size))) {
- return errstatus;
- }
- rangestatus = ap_set_byterange(r);
- ap_send_http_header(r);
-
- if (r->finfo.st_size <= dir_config->xattr_file_size && dir_config->buf) {
- if (!r->header_only) {
- error = OK;
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, r,
- "fetching data from glusterfs through "
- "xattr interface\n");
-
- if (!rangestatus) {
- if (ap_rwrite (dir_config->buf,
- r->finfo.st_size, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- } else {
- long offset, length;
- while (ap_each_byterange (r, &offset, &length)) {
- if (ap_rwrite (dir_config->buf + offset,
- length, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- break;
- }
- }
- }
- }
-
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- return error;
- }
-
- path = r->uri;
- fd = glusterfs_open (path , O_RDONLY, 0);
-
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
- "file permissions deny server access: %s",
- r->filename);
- return FORBIDDEN;
- }
-
- if (!r->header_only) {
- if (!rangestatus) {
- mod_glusterfs_read_async (r, fd, 0, -1);
- } else {
- long offset, length;
- while (ap_each_byterange(r, &offset, &length)) {
- mod_glusterfs_read_async (r, fd, offset, length);
- }
- }
- }
-
- glusterfs_close (fd);
- return error;
-}
-
-static const command_rec mod_glusterfs_cmds[] =
-{
- {"GlusterfsLogfile", add_logfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Logfile"},
- {"GlusterfsLoglevel", set_loglevel, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Loglevel:anyone of none, critical, error, warning, debug"},
- {"GlusterfsCacheTimeout", set_cache_timeout, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Timeout value in seconds for caching lookups and stats"},
- {"GlusterfsVolumeSpecfile", add_specfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Specfile required to access contents of this directory"},
- {"GlusterfsXattrFileSize", add_xattr_file_size, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Maximum size of the file to be fetched using xattr interface of "
- "glusterfs"},
- {NULL}
-};
-
-static const handler_rec mod_glusterfs_handlers[] =
-{
- {GLUSTERFS_HANDLER, mod_glusterfs_handler},
- {NULL}
-};
-
-module glusterfs_module =
-{
- STANDARD_MODULE_STUFF,
- NULL,
- mod_glusterfs_create_dir_config, /* per-directory config creator */
- NULL,
- NULL, /* server config creator */
- NULL, /* server config merger */
- mod_glusterfs_cmds, /* command table */
- mod_glusterfs_handlers, /* [7] list of handlers */
- NULL, /* [2] filename-to-URI translation */
- NULL, /* [5] check/validate user_id */
- NULL, /* [6] check user_id is valid *here* */
- NULL, /* [4] check access by host address */
- NULL, /* [7] MIME type checker/setter */
- mod_glusterfs_fixup, /* [8] fixups */
- NULL, /* [10] logger */
-#if MODULE_MAGIC_NUMBER >= 19970103
- NULL, /* [3] header parser */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970719
- mod_glusterfs_child_init, /* process initializer */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970728
- mod_glusterfs_child_exit, /* process exit/cleanup */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970902
- NULL /* [1] post read_request handling */
-#endif
-};
diff --git a/mod_glusterfs/apache/2.2/src/Makefile.am b/mod_glusterfs/apache/2.2/src/Makefile.am
deleted file mode 100644
index 1e8f3a31e..000000000
--- a/mod_glusterfs/apache/2.2/src/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/2.2
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/2.2/src/mod_glusterfs-build.c
- $(APXS) -c -o mod_glusterfs.la -Wc,-g3 -Wc,-O0 -DLINUX=2 -D_REENTRANT -D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -I$(top_srcdir)/libglusterfsclient/src -L$(top_builddir)/libglusterfsclient/src/.libs/ -lglusterfsclient mod_glusterfs-build.c
- -ln -sf .libs/mod_glusterfs.so mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- rm -fv *.so *.o
diff --git a/mod_glusterfs/apache/2.2/src/README.txt b/mod_glusterfs/apache/2.2/src/README.txt
deleted file mode 100644
index 214a2535b..000000000
--- a/mod_glusterfs/apache/2.2/src/README.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installing mod_glusterfs for httpd-2.2 and higher.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to
-the way apache has been built, currently following points have to be taken care of:
-
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzf httpd-2.2.10.tar.gz
- $ cd httpd-2.2.10/
- $ export CFLAGS='-D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64'
- $ ./configure --prefix=/usr
- $ make
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version
- to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n glusterfs -ia /usr/lib/glusterfs/2.0.0rc4/apache/2.2/mod_glusterfs.so
-**********************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-* mod_glusterfs also implements mod_dir and mod_autoindex behaviour for files under glusterfs mount.
- Hence it also takes the directives related to both of these modules. For more details, refer the
- documentation for both of these modules.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is
- multithreaded. If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
diff --git a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c b/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
deleted file mode 100644
index d2b9f3232..000000000
--- a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
+++ /dev/null
@@ -1,3627 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#ifndef NO_CONTENT_TYPE
-#define NO_CONTENT_TYPE "none"
-#endif
-
-#define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <util_filter.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-#include <apr.h>
-#include <apr_strings.h>
-#include <apr_buckets.h>
-#include <apr_fnmatch.h>
-#include <apr_lib.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glfs: Unrecognized log-level \"%s\", "\
- " possible values are \"DEBUG|WARNING|"\
- "ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-module AP_MODULE_DECLARE_DATA glusterfs_module;
-extern module core_module;
-
-#define NO_OPTIONS (1 << 0) /* Indexing options */
-#define ICONS_ARE_LINKS (1 << 1)
-#define SCAN_HTML_TITLES (1 << 2)
-#define SUPPRESS_ICON (1 << 3)
-#define SUPPRESS_LAST_MOD (1 << 4)
-#define SUPPRESS_SIZE (1 << 5)
-#define SUPPRESS_DESC (1 << 6)
-#define SUPPRESS_PREAMBLE (1 << 7)
-#define SUPPRESS_COLSORT (1 << 8)
-#define SUPPRESS_RULES (1 << 9)
-#define FOLDERS_FIRST (1 << 10)
-#define VERSION_SORT (1 << 11)
-#define TRACK_MODIFIED (1 << 12)
-#define FANCY_INDEXING (1 << 13)
-#define TABLE_INDEXING (1 << 14)
-#define IGNORE_CLIENT (1 << 15)
-#define IGNORE_CASE (1 << 16)
-#define EMIT_XHTML (1 << 17)
-#define SHOW_FORBIDDEN (1 << 18)
-
-#define K_NOADJUST 0
-#define K_ADJUST 1
-#define K_UNSET 2
-
-/*
- * Define keys for sorting.
- */
-#define K_NAME 'N' /* Sort by file name (default) */
-#define K_LAST_MOD 'M' /* Last modification date */
-#define K_SIZE 'S' /* Size (absolute, not as displayed) */
-#define K_DESC 'D' /* Description */
-#define K_VALID "NMSD" /* String containing _all_ valid K_ opts */
-
-#define D_ASCENDING 'A'
-#define D_DESCENDING 'D'
-#define D_VALID "AD" /* String containing _all_ valid D_ opts */
-
-/*
- * These are the dimensions of the default icons supplied with Apache.
- */
-#define DEFAULT_ICON_WIDTH 20
-#define DEFAULT_ICON_HEIGHT 22
-
-/*
- * Other default dimensions.
- */
-#define DEFAULT_NAME_WIDTH 23
-#define DEFAULT_DESC_WIDTH 23
-
-struct mod_glfs_ai_item {
- char *type;
- char *apply_to;
- char *apply_path;
- char *data;
-};
-
-typedef struct mod_glfs_ai_desc_t {
- char *pattern;
- char *description;
- int full_path;
- int wildcards;
-} mod_glfs_ai_desc_t;
-
-typedef enum {
- SLASH_OFF = 0,
- SLASH_ON,
- SLASH_UNSET
-} mod_glfs_dir_slash_cfg;
-
-/* static ap_filter_rec_t *mod_glfs_output_filter_handle; */
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
-
- size_t xattr_file_size;
- uint32_t cache_timeout;
-
- /* mod_dir options */
- apr_array_header_t *index_names;
- mod_glfs_dir_slash_cfg do_slash;
-
- /* autoindex options */
- char *default_icon;
- char *style_sheet;
- apr_int32_t opts;
- apr_int32_t incremented_opts;
- apr_int32_t decremented_opts;
- int name_width;
- int name_adjust;
- int desc_width;
- int desc_adjust;
- int icon_width;
- int icon_height;
- char default_keyid;
- char default_direction;
-
- apr_array_header_t *icon_list;
- apr_array_header_t *alt_list;
- apr_array_header_t *desc_list;
- apr_array_header_t *ign_list;
- apr_array_header_t *hdr_list;
- apr_array_header_t *rdme_list;
-
- char *ctype;
- char *charset;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-
-static glusterfs_dir_config_t *
-mod_glfs_dconfig (request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-
-static const char *
-cmd_add_xattr_file_size (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_cache_timeout (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_loglevel (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = apr_pstrdup (cmd->pool, arg);
-
- return error;
-}
-
-static const char *
-cmd_add_logfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-
-static const char *
-cmd_add_volume_specfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-#define WILDCARDS_REQUIRED 0
-
-static const char *
-cmd_add_desc (cmd_parms *cmd, void *d, const char *desc,
- const char *to)
-{
- glusterfs_dir_config_t *dcfg = NULL;
- mod_glfs_ai_desc_t *desc_entry = NULL;
- char *prefix = "";
-
- dcfg = (glusterfs_dir_config_t *) d;
- desc_entry = (mod_glfs_ai_desc_t *) apr_array_push(dcfg->desc_list);
- desc_entry->full_path = (ap_strchr_c(to, '/') == NULL) ? 0 : 1;
- desc_entry->wildcards = (WILDCARDS_REQUIRED
- || desc_entry->full_path
- || apr_fnmatch_test(to));
- if (desc_entry->wildcards) {
- prefix = desc_entry->full_path ? "*/" : "*";
- desc_entry->pattern = apr_pstrcat(dcfg->desc_list->pool,
- prefix, to, "*", NULL);
- }
- else {
- desc_entry->pattern = apr_pstrdup(dcfg->desc_list->pool, to);
- }
- desc_entry->description = apr_pstrdup(dcfg->desc_list->pool, desc);
- return NULL;
-}
-
-
-static void push_item(apr_array_header_t *arr, char *type, const char *to,
- const char *path, const char *data)
-{
- struct mod_glfs_ai_item *p = NULL;
-
- p = (struct mod_glfs_ai_item *) apr_array_push(arr);
-
- if (!to) {
- to = "";
- }
- if (!path) {
- path = "";
- }
-
- p->type = type;
- p->data = data ? apr_pstrdup(arr->pool, data) : NULL;
- p->apply_path = apr_pstrcat(arr->pool, path, "*", NULL);
-
- if ((type == BY_PATH) && (!ap_is_matchexp(to))) {
- p->apply_to = apr_pstrcat(arr->pool, "*", to, NULL);
- }
- else if (to) {
- p->apply_to = apr_pstrdup(arr->pool, to);
- }
- else {
- p->apply_to = NULL;
- }
-}
-
-
-static const char *
-cmd_add_ignore (cmd_parms *cmd, void *d, const char *ext)
-{
- push_item(((glusterfs_dir_config_t *) d)->ign_list, 0, ext, cmd->path,
- NULL);
- return NULL;
-}
-
-
-static const char *
-cmd_add_header (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->hdr_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_readme (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->rdme_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_opts (cmd_parms *cmd, void *d, int argc, char *const argv[])
-{
- int i = 0, option = 0;
- char *w = NULL;
- apr_int32_t opts;
- apr_int32_t opts_add;
- apr_int32_t opts_remove;
- char action = 0;
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) d;
-
- opts = d_cfg->opts;
- opts_add = d_cfg->incremented_opts;
- opts_remove = d_cfg->decremented_opts;
-
- for (i = 0; i < argc; i++) {
- w = argv[i];
-
- if ((*w == '+') || (*w == '-')) {
- action = *(w++);
- }
- else {
- action = '\0';
- }
- if (!strcasecmp(w, "FancyIndexing")) {
- option = FANCY_INDEXING;
- }
- else if (!strcasecmp(w, "FoldersFirst")) {
- option = FOLDERS_FIRST;
- }
- else if (!strcasecmp(w, "HTMLTable")) {
- option = TABLE_INDEXING;
- }
- else if (!strcasecmp(w, "IconsAreLinks")) {
- option = ICONS_ARE_LINKS;
- }
- else if (!strcasecmp(w, "IgnoreCase")) {
- option = IGNORE_CASE;
- }
- else if (!strcasecmp(w, "IgnoreClient")) {
- option = IGNORE_CLIENT;
- }
- else if (!strcasecmp(w, "ScanHTMLTitles")) {
- option = SCAN_HTML_TITLES;
- }
- else if (!strcasecmp(w, "SuppressColumnSorting")) {
- option = SUPPRESS_COLSORT;
- }
- else if (!strcasecmp(w, "SuppressDescription")) {
- option = SUPPRESS_DESC;
- }
- else if (!strcasecmp(w, "SuppressHTMLPreamble")) {
- option = SUPPRESS_PREAMBLE;
- }
- else if (!strcasecmp(w, "SuppressIcon")) {
- option = SUPPRESS_ICON;
- }
- else if (!strcasecmp(w, "SuppressLastModified")) {
- option = SUPPRESS_LAST_MOD;
- }
- else if (!strcasecmp(w, "SuppressSize")) {
- option = SUPPRESS_SIZE;
- }
- else if (!strcasecmp(w, "SuppressRules")) {
- option = SUPPRESS_RULES;
- }
- else if (!strcasecmp(w, "TrackModified")) {
- option = TRACK_MODIFIED;
- }
- else if (!strcasecmp(w, "VersionSort")) {
- option = VERSION_SORT;
- }
- else if (!strcasecmp(w, "XHTML")) {
- option = EMIT_XHTML;
- }
- else if (!strcasecmp(w, "ShowForbidden")) {
- option = SHOW_FORBIDDEN;
- }
- else if (!strcasecmp(w, "None")) {
- if (action != '\0') {
- return "Cannot combine '+' or '-' with 'None' "
- "keyword";
- }
- opts = NO_OPTIONS;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (!strcasecmp(w, "IconWidth")) {
- if (action != '-') {
- d_cfg->icon_width = DEFAULT_ICON_WIDTH;
- }
- else {
- d_cfg->icon_width = 0;
- }
- }
- else if (!strncasecmp(w, "IconWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with IconWidth=n";
- }
- d_cfg->icon_width = atoi(&w[10]);
- }
- else if (!strcasecmp(w, "IconHeight")) {
- if (action != '-') {
- d_cfg->icon_height = DEFAULT_ICON_HEIGHT;
- }
- else {
- d_cfg->icon_height = 0;
- }
- }
- else if (!strncasecmp(w, "IconHeight=", 11)) {
- if (action == '-') {
- return "Cannot combine '-' with IconHeight=n";
- }
- d_cfg->icon_height = atoi(&w[11]);
- }
- else if (!strcasecmp(w, "NameWidth")) {
- if (action != '-') {
- return "NameWidth with no value may only appear"
- " as "
- "'-NameWidth'";
- }
- d_cfg->name_width = DEFAULT_NAME_WIDTH;
- d_cfg->name_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "NameWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with NameWidth=n";
- }
- if (w[10] == '*') {
- d_cfg->name_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[10]);
-
- if (width && (width < 5)) {
- return "NameWidth value must be greater"
- " than 5";
- }
- d_cfg->name_width = width;
- d_cfg->name_adjust = K_NOADJUST;
- }
- }
- else if (!strcasecmp(w, "DescriptionWidth")) {
- if (action != '-') {
- return "DescriptionWidth with no value may only"
- " appear as "
- "'-DescriptionWidth'";
- }
- d_cfg->desc_width = DEFAULT_DESC_WIDTH;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "DescriptionWidth=", 17)) {
- if (action == '-') {
- return "Cannot combine '-' with "
- "DescriptionWidth=n";
- }
- if (w[17] == '*') {
- d_cfg->desc_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[17]);
-
- if (width && (width < 12)) {
- return "DescriptionWidth value must be "
- "greater than 12";
- }
- d_cfg->desc_width = width;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- }
- else if (!strncasecmp(w, "Type=", 5)) {
- d_cfg->ctype = apr_pstrdup(cmd->pool, &w[5]);
- }
- else if (!strncasecmp(w, "Charset=", 8)) {
- d_cfg->charset = apr_pstrdup(cmd->pool, &w[8]);
- }
- else {
- return "Invalid directory indexing option";
- }
- if (action == '\0') {
- opts |= option;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (action == '+') {
- opts_add |= option;
- opts_remove &= ~option;
- }
- else {
- opts_remove |= option;
- opts_add &= ~option;
- }
- }
- if ((opts & NO_OPTIONS) && (opts & ~NO_OPTIONS)) {
- return "Cannot combine other IndexOptions keywords with 'None'";
- }
- d_cfg->incremented_opts = opts_add;
- d_cfg->decremented_opts = opts_remove;
- d_cfg->opts = opts;
- return NULL;
-}
-
-
-static const char *
-cmd_set_default_order(cmd_parms *cmd, void *m,
- const char *direction, const char *key)
-{
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) m;
-
- if (!strcasecmp(direction, "Ascending")) {
- d_cfg->default_direction = D_ASCENDING;
- }
- else if (!strcasecmp(direction, "Descending")) {
- d_cfg->default_direction = D_DESCENDING;
- }
- else {
- return "First keyword must be 'Ascending' or 'Descending'";
- }
-
- if (!strcasecmp(key, "Name")) {
- d_cfg->default_keyid = K_NAME;
- }
- else if (!strcasecmp(key, "Date")) {
- d_cfg->default_keyid = K_LAST_MOD;
- }
- else if (!strcasecmp(key, "Size")) {
- d_cfg->default_keyid = K_SIZE;
- }
- else if (!strcasecmp(key, "Description")) {
- d_cfg->default_keyid = K_DESC;
- }
- else {
- return "Second keyword must be 'Name', 'Date', 'Size', or "
- "'Description'";
- }
-
- return NULL;
-}
-
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-/*
- * This routine puts the standard HTML header at the top of the index page.
- * We include the DOCTYPE because we may be using features therefrom (i.e.,
- * HEIGHT and WIDTH attributes on the icons if we're FancyIndexing).
- */
-static void emit_preamble(request_rec *r, int xhtml, const char *title)
-{
- glusterfs_dir_config_t *d;
-
- d = (glusterfs_dir_config_t *) ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- if (xhtml) {
- ap_rvputs(r, DOCTYPE_XHTML_1_0T,
- "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"
- " <head>\n <title>Index of ", title,
- "</title>\n", NULL);
- } else {
- ap_rvputs(r, DOCTYPE_HTML_3_2,
- "<html>\n <head>\n"
- " <title>Index of ", title,
- "</title>\n", NULL);
- }
-
- if (d->style_sheet != NULL) {
- ap_rvputs(r, " <link rel=\"stylesheet\" href=\"",
- d->style_sheet, "\" type=\"text/css\"",
- xhtml ? " />\n" : ">\n", NULL);
- }
- ap_rvputs(r, " </head>\n <body>\n", NULL);
-}
-
-
-static const char *cmd_add_alt(cmd_parms *cmd, void *d, const char *alt,
- const char *to)
-{
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- char *tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->alt_list, cmd->info, to,
- cmd->path, alt);
- return NULL;
-}
-
-static const char *cmd_add_icon(cmd_parms *cmd, void *d, const char *icon,
- const char *to)
-{
- char *iconbak = apr_pstrdup(cmd->pool, icon);
- char *alt = NULL, *cl = NULL, *tmp = NULL;
-
- if (icon[0] == '(') {
- cl = strchr(iconbak, ')');
-
- if (cl == NULL) {
- return "missing closing paren";
- }
- alt = ap_getword_nc(cmd->pool, &iconbak, ',');
- *cl = '\0'; /* Lose closing paren */
- cmd_add_alt(cmd, d, &alt[1], to);
- }
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->icon_list, cmd->info, to,
- cmd->path, iconbak);
- return NULL;
-}
-
-
-static void *
-mod_glfs_create_dir_config(apr_pool_t *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) apr_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- /* mod_dir options init */
- dir_config->index_names = NULL;
- dir_config->do_slash = SLASH_UNSET;
-
- /* autoindex options init */
- dir_config->icon_width = 0;
- dir_config->icon_height = 0;
- dir_config->name_width = DEFAULT_NAME_WIDTH;
- dir_config->name_adjust = K_UNSET;
- dir_config->desc_width = DEFAULT_DESC_WIDTH;
- dir_config->desc_adjust = K_UNSET;
- dir_config->icon_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->alt_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->desc_list = apr_array_make(p, 4,
- sizeof(mod_glfs_ai_desc_t));
- dir_config->ign_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->hdr_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->rdme_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->opts = 0;
- dir_config->incremented_opts = 0;
- dir_config->decremented_opts = 0;
- dir_config->default_keyid = '\0';
- dir_config->default_direction = '\0';
-
- return (void *) dir_config;
-}
-
-
-static void *
-mod_glfs_merge_dir_config(apr_pool_t *p, void *parent_conf,
- void *newloc_conf)
-{
- glusterfs_dir_config_t *new = NULL;
- glusterfs_dir_config_t *add = NULL;
- glusterfs_dir_config_t *base = NULL;
-
- new = (glusterfs_dir_config_t *)
- apr_pcalloc(p, sizeof(glusterfs_dir_config_t));
- add = newloc_conf;
- base = parent_conf;
-
- if (add->logfile)
- new->logfile = apr_pstrdup (p, add->logfile);
-
- if (add->loglevel)
- new->loglevel = apr_pstrdup (p, add->loglevel);
-
- if (add->specfile)
- new->specfile = apr_pstrdup (p, add->specfile);
-
- if (add->mount_dir)
- new->mount_dir = apr_pstrdup (p, add->mount_dir);
-
- new->xattr_file_size = add->xattr_file_size;
- new->cache_timeout = add->cache_timeout;
- new->buf = add->buf;
-
- /* mod_dir */
- new->index_names = add->index_names ?
- add->index_names : base->index_names;
- new->do_slash =
- (add->do_slash == SLASH_UNSET) ? base->do_slash : add->do_slash;
-
- /* auto index */
- new->default_icon = add->default_icon ? add->default_icon
- : base->default_icon;
- new->style_sheet = add->style_sheet ? add->style_sheet
- : base->style_sheet;
- new->icon_height = add->icon_height ?
- add->icon_height : base->icon_height;
- new->icon_width = add->icon_width ? add->icon_width : base->icon_width;
-
- new->ctype = add->ctype ? add->ctype : base->ctype;
- new->charset = add->charset ? add->charset : base->charset;
-
- new->alt_list = apr_array_append(p, add->alt_list, base->alt_list);
- new->ign_list = apr_array_append(p, add->ign_list, base->ign_list);
- new->hdr_list = apr_array_append(p, add->hdr_list, base->hdr_list);
- new->desc_list = apr_array_append(p, add->desc_list, base->desc_list);
- new->icon_list = apr_array_append(p, add->icon_list, base->icon_list);
- new->rdme_list = apr_array_append(p, add->rdme_list, base->rdme_list);
- if (add->opts & NO_OPTIONS) {
- /*
- * If the current directory says 'no options' then we also
- * clear any incremental mods from being inheritable further down.
- */
- new->opts = NO_OPTIONS;
- new->incremented_opts = 0;
- new->decremented_opts = 0;
- }
- else {
- /*
- * If there were any nonincremental options selected for
- * this directory, they dominate and we don't inherit *anything.*
- * Contrariwise, we *do* inherit if the only settings here are
- * incremental ones.
- */
- if (add->opts == 0) {
- new->incremented_opts = (base->incremented_opts
- | add->incremented_opts)
- & ~add->decremented_opts;
- new->decremented_opts = (base->decremented_opts
- | add->decremented_opts);
- /*
- * We may have incremental settings, so make sure we
- * don't inadvertently inherit an IndexOptions None
- * from above.
- */
- new->opts = (base->opts & ~NO_OPTIONS);
- }
- else {
- /*
- * There are local nonincremental settings, which clear
- * all inheritance from above. They *are* the new
- * base settings.
- */
- new->opts = add->opts;;
- }
- /*
- * We're guaranteed that there'll be no overlap between
- * the add-options and the remove-options.
- */
- new->opts |= new->incremented_opts;
- new->opts &= ~new->decremented_opts;
- }
- /*
- * Inherit the NameWidth settings if there aren't any specific to
- * the new location; otherwise we'll end up using the defaults set
- * in the config-rec creation routine.
- */
- if (add->name_adjust == K_UNSET) {
- new->name_width = base->name_width;
- new->name_adjust = base->name_adjust;
- }
- else {
- new->name_width = add->name_width;
- new->name_adjust = add->name_adjust;
- }
-
- /*
- * Likewise for DescriptionWidth.
- */
- if (add->desc_adjust == K_UNSET) {
- new->desc_width = base->desc_width;
- new->desc_adjust = base->desc_adjust;
- }
- else {
- new->desc_width = add->desc_width;
- new->desc_adjust = add->desc_adjust;
- }
-
- new->default_keyid = add->default_keyid ? add->default_keyid
- : base->default_keyid;
- new->default_direction = add->default_direction ? add->default_direction
- : base->default_direction;
-
- return (void *) new;
-}
-
-
-static void
-mod_glfs_child_init(apr_pool_t *p, server_rec *s)
-{
- int i = 0, num_sec = 0, ret = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = (core_server_config *) ap_get_module_config (s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
-
- if (dir_config) {
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.logfile = dir_config->logfile;
- ctx.loglevel = dir_config->loglevel;
- ctx.lookup_timeout = dir_config->cache_timeout;
- ctx.stat_timeout = dir_config->cache_timeout;
- ctx.specfile = dir_config->specfile;
-
- ret = glusterfs_mount (dir_config->mount_dir, &ctx);
- if (ret != 0) {
- ap_log_error(APLOG_MARK, APLOG_ERR,
- APR_EGENERAL, s,
- "mod_glfs_child_init: "
- "glusterfs_init failed, check "
- "glusterfs logfile %s for more "
- "details",
- dir_config->logfile);
- }
- }
- dir_config = NULL;
- }
-}
-
-
-static void
-mod_glfs_child_exit(server_rec *s, apr_pool_t *p)
-{
- int i = 0, num_sec = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = ap_get_module_config(s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static apr_filetype_e filetype_from_mode(mode_t mode)
-{
- apr_filetype_e type = APR_NOFILE;
-
- if (S_ISREG(mode))
- type = APR_REG;
- else if (S_ISDIR(mode))
- type = APR_DIR;
- else if (S_ISCHR(mode))
- type = APR_CHR;
- else if (S_ISBLK(mode))
- type = APR_BLK;
- else if (S_ISFIFO(mode))
- type = APR_PIPE;
- else if (S_ISLNK(mode))
- type = APR_LNK;
- else if (S_ISSOCK(mode))
- type = APR_SOCK;
- else
- type = APR_UNKFILE;
- return type;
-}
-
-
-static void fill_out_finfo(apr_finfo_t *finfo, struct stat *info,
- apr_int32_t wanted)
-{
- finfo->valid = APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK
- | APR_FINFO_OWNER | APR_FINFO_PROT;
- finfo->protection = apr_unix_mode2perms(info->st_mode);
- finfo->filetype = filetype_from_mode(info->st_mode);
- finfo->user = info->st_uid;
- finfo->group = info->st_gid;
- finfo->size = info->st_size;
- finfo->device = info->st_dev;
- finfo->nlink = info->st_nlink;
-
- /* Check for overflow if storing a 64-bit st_ino in a 32-bit
- * apr_ino_t for LFS builds: */
- if (sizeof(apr_ino_t) >= sizeof(info->st_ino)
- || (apr_ino_t)info->st_ino == info->st_ino) {
- finfo->inode = info->st_ino;
- } else {
- finfo->valid &= ~APR_FINFO_INODE;
- }
-
- apr_time_ansi_put(&finfo->atime, info->st_atime);
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- finfo->atime += info->st_atim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
- finfo->atime += info->st_atimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIME_N)
- finfo->ctime += info->st_atime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->mtime, info->st_mtime);
-#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
- finfo->mtime += info->st_mtim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
- finfo->mtime += info->st_mtimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIME_N)
- finfo->ctime += info->st_mtime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->ctime, info->st_ctime);
-#ifdef HAVE_STRUCT_STAT_ST_CTIM_TV_NSEC
- finfo->ctime += info->st_ctim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIMENSEC)
- finfo->ctime += info->st_ctimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIME_N)
- finfo->ctime += info->st_ctime_n / APR_TIME_C(1000);
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_BLOCKS
-#ifdef DEV_BSIZE
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)DEV_BSIZE;
-#else
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)512;
-#endif
- finfo->valid |= APR_FINFO_CSIZE;
-#endif
-}
-
-
-static int
-mod_glfs_map_to_storage(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL, *tmp = NULL;
- int access_status = 0, ret = 0;
- char *path = NULL;
- struct stat st = {0, };
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- int num_sec = 0, i = 0;
-
- sconf = (core_server_config *) ap_get_module_config (r->server->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- tmp = ap_get_module_config (sec_ent[i], &glusterfs_module);
-
- if (tmp && !strncmp (tmp->mount_dir, r->uri,
- strlen (tmp->mount_dir))) {
- if (!dir_config ||
- strlen (tmp->mount_dir)
- > strlen (dir_config->mount_dir)) {
- dir_config = tmp;
- }
- }
-
- }
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (apr_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL), r->uri,
- strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = GLUSTERFS_HANDLER;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &st);
-
- if (ret == -1 || st.st_size > dir_config->xattr_file_size
- || S_ISDIR (st.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, 0,
- r, "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- r->finfo.pool = r->pool;
- r->finfo.fname = r->filename;
- fill_out_finfo (&r->finfo, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK |
- APR_FINFO_OWNER | APR_FINFO_PROT);
-
- /* allow core module to run directory_walk() and location_walk() */
- return DECLINED;
-}
-
-
-static int
-mod_glfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glfs_read_async (request_rec *r, apr_bucket_brigade *bb,
- glusterfs_file_t fd,
- apr_off_t offset, apr_off_t length)
-{
- glusterfs_async_local_t local = {0, };
- off_t end = 0;
- int nbytes = 0, complete = 0;
- conn_rec *c = r->connection;
- apr_bucket *e = NULL;
- apr_status_t status = APR_SUCCESS;
- glusterfs_iobuf_t *buf = NULL;
-
- if (length == 0) {
- return 0;
- }
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length) ||
- (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (!bb) {
- bb = apr_brigade_create (r->pool, c->bucket_alloc);
- }
- apr_brigade_writev (bb, NULL, NULL, buf->vector, buf->count);
-
- /*
- * make sure all the data is written out, since we call
- * glusterfs_free on buf once ap_pass_brigade returns
- */
- e = apr_bucket_flush_create (c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- status = ap_pass_brigade (r->output_filters, bb);
- if (status != APR_SUCCESS) {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- complete = 1;
- local.op_ret = -1;
- }
-
- glusterfs_free (buf);
-
- /*
- * bb has already been cleaned up by core_output_filter,
- * just being paranoid
- */
- apr_brigade_cleanup (bb);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? HTTP_INTERNAL_SERVER_ERROR : OK);
-}
-
-static int
-parse_byterange(char *range, apr_off_t clength,
- apr_off_t *start, apr_off_t *end)
-{
- char *dash = NULL, *errp = NULL;
- apr_off_t number;
-
- dash = strchr(range, '-');
- if (!dash) {
- return 0;
- }
-
- if ((dash == range)) {
- /* In the form "-5" */
- if (apr_strtoff(&number, dash+1, &errp, 10) || *errp) {
- return 0;
- }
- *start = clength - number;
- *end = clength - 1;
- }
- else {
- *dash++ = '\0';
- if (apr_strtoff(&number, range, &errp, 10) || *errp) {
- return 0;
- }
- *start = number;
- if (*dash) {
- if (apr_strtoff(&number, dash, &errp, 10) || *errp) {
- return 0;
- }
- *end = number;
- }
- else { /* "5-" */
- *end = clength - 1;
- }
- }
-
- if (*start < 0) {
- *start = 0;
- }
-
- if (*end >= clength) {
- *end = clength - 1;
- }
-
- if (*start > *end) {
- return -1;
- }
-
- return (*start > 0 || *end < clength);
-}
-
-
-static int use_range_x(request_rec *r)
-{
- const char *ua = NULL;
- return (apr_table_get(r->headers_in, "Request-Range")
- || ((ua = apr_table_get(r->headers_in, "User-Agent"))
- && ap_strstr_c(ua, "MSIE 3")));
-}
-
-
-static int ap_set_byterange(request_rec *r)
-{
- const char *range = NULL, *if_range = NULL, *match = NULL, *ct = NULL;
- int num_ranges = 0;
-
- if (r->assbackwards) {
- return 0;
- }
-
- /* Check for Range request-header (HTTP/1.1) or Request-Range for
- * backwards-compatibility with second-draft Luotonen/Franks
- * byte-ranges (e.g. Netscape Navigator 2-3).
- *
- * We support this form, with Request-Range, and (farther down) we
- * send multipart/x-byteranges instead of multipart/byteranges for
- * Request-Range based requests to work around a bug in Netscape
- * Navigator 2-3 and MSIE 3.
- */
-
- if (!(range = apr_table_get(r->headers_in, "Range"))) {
- range = apr_table_get(r->headers_in, "Request-Range");
- }
-
- if (!range || strncasecmp(range, "bytes=", 6) || r->status != HTTP_OK) {
- return 0;
- }
-
- /* is content already a single range? */
- if (apr_table_get(r->headers_out, "Content-Range")) {
- return 0;
- }
-
- /* is content already a multiple range? */
- if ((ct = apr_table_get(r->headers_out, "Content-Type"))
- && (!strncasecmp(ct, "multipart/byteranges", 20)
- || !strncasecmp(ct, "multipart/x-byteranges", 22))) {
- return 0;
- }
-
- /* Check the If-Range header for Etag or Date.
- * Note that this check will return false (as required) if either
- * of the two etags are weak.
- */
- if ((if_range = apr_table_get(r->headers_in, "If-Range"))) {
- if (if_range[0] == '"') {
- if (!(match = apr_table_get(r->headers_out, "Etag"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
- else if (!(match = apr_table_get(r->headers_out,
- "Last-Modified"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
-
- if (!ap_strchr_c(range, ',')) {
- /* a single range */
- num_ranges = 1;
- }
- else {
- /* a multiple range */
- num_ranges = 2;
- }
-
- r->status = HTTP_PARTIAL_CONTENT;
- r->range = range + 6;
-
- return num_ranges;
-}
-
-
-static void
-mod_glfs_handle_byte_ranges (request_rec *r, glusterfs_file_t fd,
- int num_ranges)
-{
- conn_rec *c = r->connection;
- char *ts = NULL, *boundary = NULL, *bound_head = NULL;
- const char *orig_ct = NULL;
- char *current = NULL, *end = NULL;
- apr_bucket_brigade *bsend = NULL;
- apr_bucket *e = NULL;
- apr_off_t range_start, range_end;
- apr_status_t rv = APR_SUCCESS;
- char found = 0;
- apr_bucket *e2 = NULL, *ec = NULL;
-
- orig_ct = ap_make_content_type (r, r->content_type);
-
- if (num_ranges > 1) {
- boundary = apr_psprintf(r->pool, "%" APR_UINT64_T_HEX_FMT "%lx",
- (apr_uint64_t)r->request_time,
- (long) getpid());
-
- ap_set_content_type(r, apr_pstrcat(r->pool, "multipart",
- use_range_x(r) ? "/x-" : "/",
- "byteranges; boundary=",
- boundary, NULL));
-
- if (strcasecmp(orig_ct, NO_CONTENT_TYPE)) {
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-type: ",
- orig_ct,
- CRLF "Content-range: bytes ",
- NULL);
- }
- else {
- /* if we have no type for the content, do our best */
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-range: bytes ",
- NULL);
- }
- }
-
- while ((current = ap_getword(r->pool, &r->range, ','))
- && (rv = parse_byterange(current, r->finfo.size, &range_start,
- &range_end))) {
- bsend = NULL;
- if (rv == -1) {
- continue;
- }
-
- found = 1;
-
- /* For single range requests, we must produce Content-Range
- * header. Otherwise, we need to produce the multipart
- * boundaries.
- */
- if (num_ranges == 1) {
- apr_table_setn(r->headers_out, "Content-Range",
- apr_psprintf(r->pool,
- "bytes " BYTERANGE_FMT,
- range_start, range_end,
- r->finfo.size));
- }
- else {
- /* this brigade holds what we will be sending */
- bsend = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_pool_create(bound_head,
- strlen(bound_head),
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
-
- ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
- range_start, range_end,
- r->finfo.size);
- e = apr_bucket_pool_create(ts, strlen(ts), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
- mod_glfs_read_async (r, bsend, fd, range_start,
- (range_end + 1 - range_start));
- }
-
- bsend = apr_brigade_create (r->pool, c->bucket_alloc);
-
- if (found == 0) {
- r->status = HTTP_OK;
- /* bsend is assumed to be empty if we get here. */
- e = ap_bucket_error_create(HTTP_RANGE_NOT_SATISFIABLE, NULL,
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- ap_pass_brigade (r->output_filters, bsend);
- return;
- }
-
- if (num_ranges > 1) {
- /* add the final boundary */
- end = apr_pstrcat(r->pool, CRLF "--", boundary, "--" CRLF,
- NULL);
-// ap_xlate_proto_to_ascii(end, strlen(end));
- e = apr_bucket_pool_create(end, strlen(end), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
-
- ap_pass_brigade (r->output_filters, bsend);
-}
-
-
-
-/****************************************************************
- *
- * Looking things up in config entries...
- */
-
-/* Structure used to hold entries when we're actually building an index */
-
-struct ent {
- char *name;
- char *icon;
- char *alt;
- char *desc;
- apr_off_t size;
- apr_time_t lm;
- struct ent *next;
- int ascending, ignore_case, version_sort;
- char key;
- int isdir;
-};
-
-static char *find_item(request_rec *r, apr_array_header_t *list, int path_only)
-{
- const char *content_type = NULL;
- const char *content_encoding = NULL;
- char *path = NULL;
- int i = 0;
- struct mod_glfs_ai_item *items = NULL;
- struct mod_glfs_ai_item *p = NULL;
-
- content_type = ap_field_noparam(r->pool, r->content_type);
- content_encoding = r->content_encoding;
- path = r->filename;
- items = (struct mod_glfs_ai_item *) list->elts;
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- /* Special cased for ^^DIRECTORY^^ and ^^BLANKICON^^ */
- if ((path[0] == '^') || (!ap_strcmp_match(path,
- p->apply_path))) {
- if (!*(p->apply_to)) {
- return p->data;
- }
- else if (p->type == BY_PATH || path[0] == '^') {
- if (!ap_strcmp_match(path, p->apply_to)) {
- return p->data;
- }
- }
- else if (!path_only) {
- if (!content_encoding) {
- if (p->type == BY_TYPE) {
- if (content_type
- && !ap_strcasecmp_match(content_type,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- else {
- if (p->type == BY_ENCODING) {
- if (!ap_strcasecmp_match(content_encoding,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- }
- }
- }
- return NULL;
-}
-
-#define find_icon(d,p,t) find_item(p,d->icon_list,t)
-#define find_alt(d,p,t) find_item(p,d->alt_list,t)
-#define find_header(d,p) find_item(p,d->hdr_list,0)
-#define find_readme(d,p) find_item(p,d->rdme_list,0)
-
-static char *find_default_item(char *bogus_name, apr_array_header_t *list)
-{
- request_rec r;
- /* Bleah. I tried to clean up find_item, and it lead to this bit
- * of ugliness. Note that the fields initialized are precisely
- * those that find_item looks at...
- */
- r.filename = bogus_name;
- r.content_type = r.content_encoding = NULL;
- return find_item(&r, list, 1);
-}
-
-#define find_default_icon(d,n) find_default_item(n, d->icon_list)
-#define find_default_alt(d,n) find_default_item(n, d->alt_list)
-
-/*
- * Look through the list of pattern/description pairs and return the first one
- * if any) that matches the filename in the request. If multiple patterns
- * match, only the first one is used; since the order in the array is the
- * same as the order in which directives were processed, earlier matching
- * directives will dominate.
- */
-
-#ifdef CASE_BLIND_FILESYSTEM
-#define MATCH_FLAGS APR_FNM_CASE_BLIND
-#else
-#define MATCH_FLAGS 0
-#endif
-
-static char *find_desc(glusterfs_dir_config_t *dcfg, const char *filename_full)
-{
- int i = 0;
- mod_glfs_ai_desc_t *list = NULL;
- const char *filename_only = NULL, *filename = NULL;
- mod_glfs_ai_desc_t *tuple = &list[i];
- int found = 0;
-
- list = (mod_glfs_ai_desc_t *) dcfg->desc_list->elts;
- /*
- * If the filename includes a path, extract just the name itself
- * for the simple matches.
- */
- if ((filename_only = ap_strrchr_c(filename_full, '/')) == NULL) {
- filename_only = filename_full;
- }
- else {
- filename_only++;
- }
- for (i = 0; i < dcfg->desc_list->nelts; ++i) {
- /*
- * Only use the full-path filename if the pattern contains '/'s.
- */
- filename = (tuple->full_path) ? filename_full : filename_only;
- /*
- * Make the comparison using the cheapest method; only do
- * wildcard checking if we must.
- */
- if (tuple->wildcards) {
- found = (apr_fnmatch(tuple->pattern, filename,
- MATCH_FLAGS) == 0);
- }
- else {
- found = (ap_strstr_c(filename, tuple->pattern) != NULL);
- }
- if (found) {
- return tuple->description;
- }
- }
- return NULL;
-}
-
-static int ignore_entry(glusterfs_dir_config_t *d, char *path)
-{
- apr_array_header_t *list = d->ign_list;
- struct mod_glfs_ai_item *items = (struct mod_glfs_ai_item *) list->elts;
- char *tt = NULL, *ap = NULL;
- int i = 0;
- struct mod_glfs_ai_item *p = &items[i];
-
- if ((tt = strrchr(path, '/')) == NULL) {
- tt = path;
- }
- else {
- tt++;
- }
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- if ((ap = strrchr(p->apply_to, '/')) == NULL) {
- ap = p->apply_to;
- }
- else {
- ap++;
- }
-
-#ifndef CASE_BLIND_FILESYSTEM
- if (!ap_strcmp_match(path, p->apply_path)
- && !ap_strcmp_match(tt, ap)) {
- return 1;
- }
-#else /* !CASE_BLIND_FILESYSTEM */
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (!ap_strcasecmp_match(path, p->apply_path)
- && !ap_strcasecmp_match(tt, ap)) {
- return 1;
- }
-#endif /* !CASE_BLIND_FILESYSTEM */
- }
- return 0;
-}
-
-/*****************************************************************
- *
- * Actually generating output
- */
-
-/*
- * Elements of the emitted document:
- * Preamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) header file.
- * Header file
- * Emitted if found (and able).
- * H1 tag line
- * Emitted if a header file is NOT emitted.
- * Directory stuff
- * Always emitted.
- * HR
- * Emitted if FANCY_INDEXING is set.
- * Readme file
- * Emitted if found (and able).
- * ServerSig
- * Emitted if ServerSignature is not Off AND a readme file
- * is NOT emitted.
- * Postamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) readme file.
- */
-
-
-/*
- * emit a plain text file
- */
-static void do_emit_plain(request_rec *r, apr_file_t *f)
-{
- char buf[AP_IOBUFSIZE + 1];
- int ch = 0;
- apr_size_t i = 0, c = 0, n = 0;
- apr_status_t rv = APR_SUCCESS;
-
- ap_rputs("<pre>\n", r);
- while (!apr_file_eof(f)) {
- do {
- n = sizeof(char) * AP_IOBUFSIZE;
- rv = apr_file_read(f, buf, &n);
- } while (APR_STATUS_IS_EINTR(rv));
- if (n == 0 || rv != APR_SUCCESS) {
- /* ###: better error here? */
- break;
- }
- buf[n] = '\0';
- c = 0;
- while (c < n) {
- for (i = c; i < n; i++) {
- if (buf[i] == '<' || buf[i] == '>'
- || buf[i] == '&') {
- break;
- }
- }
- ch = buf[i];
- buf[i] = '\0';
- ap_rputs(&buf[c], r);
- if (ch == '<') {
- ap_rputs("&lt;", r);
- }
- else if (ch == '>') {
- ap_rputs("&gt;", r);
- }
- else if (ch == '&') {
- ap_rputs("&amp;", r);
- }
- c = i + 1;
- }
- }
- ap_rputs("</pre>\n", r);
-}
-
-/*
- * Handle the preamble through the H1 tag line, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as HEADER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_head(request_rec *r, char *header_fname, int suppress_amble,
- int emit_xhtml, char *title)
-{
- apr_table_t *hdrs = r->headers_in;
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int emit_amble = 1;
- int emit_H1 = 1;
- const char *r_accept = NULL;
- const char *r_accept_enc = NULL;
-
- /*
- * If there's a header file, send a subrequest to look for it. If it's
- * found and html do the subrequest, otherwise handle it
- */
- r_accept = apr_table_get(hdrs, "Accept");
- r_accept_enc = apr_table_get(hdrs, "Accept-Encoding");
- apr_table_setn(hdrs, "Accept", "text/html, text/plain");
- apr_table_unset(hdrs, "Accept-Encoding");
-
-
- if ((header_fname != NULL) && r->args) {
- header_fname = apr_pstrcat(r->pool, header_fname, "?", r->args,
- NULL);
- }
-
- if ((header_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(header_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && (rr->finfo.filetype == APR_REG)) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f = NULL;
-
- /* Hope everything will work... */
- emit_amble = 0;
- emit_H1 = 0;
-
- if (! suppress_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- /* This is a hack, but I can't find any better
- * way to do this. The problem is that we have
- * already created the sub-request,
- * but we just inserted the OLD_WRITE filter,
- * and the sub-request needs to pass its data
- * through the OLD_WRITE filter, or things go
- * horribly wrong (missing data, data in
- * the wrong order, etc). To fix it, if you
- * create a sub-request and then insert the
- * OLD_WRITE filter before you run the request,
- * you need to make sure that the sub-request
- * data goes through the OLD_WRITE filter. Just
- * steal this code. The long-term solution is
- * to remove the ap_r* functions.
- */
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
- /*
- * If there's a problem running the subrequest,
- * display the preamble if we didn't do it
- * before -- the header file didn't get displayed.
- */
- if (ap_run_sub_req(rr) != OK) {
- /* It didn't work */
- emit_amble = suppress_amble;
- emit_H1 = 1;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, prefix it with the
- * preamble regardless; since we'll be sending
- * a <pre> block around the file's contents,
- * any HTML header it had won't end up
- * where it belongs.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- emit_preamble(r, emit_xhtml, title);
- emit_amble = 0;
- do_emit_plain(r, f);
- apr_file_close(f);
- emit_H1 = 0;
- }
- }
- }
- }
-
- if (r_accept) {
- apr_table_setn(hdrs, "Accept", r_accept);
- }
- else {
- apr_table_unset(hdrs, "Accept");
- }
-
- if (r_accept_enc) {
- apr_table_setn(hdrs, "Accept-Encoding", r_accept_enc);
- }
-
- if (emit_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- if (emit_H1) {
- ap_rvputs(r, "<h1>Index of ", title, "</h1>\n", NULL);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-/*
- * Handle the Readme file through the postamble, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as FOOTER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_tail(request_rec *r, char *readme_fname, int suppress_amble)
-{
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int suppress_post = 0, suppress_sig = 0;
-
- /*
- * If there's a readme file, send a subrequest to look for it. If it's
- * found and a text file, handle it -- otherwise fall through and
- * pretend there's nothing there.
- */
- if ((readme_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(readme_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && rr->finfo.filetype == APR_REG) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f;
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
-
- if (ap_run_sub_req(rr) == OK) {
- /* worked... */
- suppress_sig = 1;
- suppress_post = suppress_amble;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, suppress the signature.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- do_emit_plain(r, f);
- apr_file_close(f);
- suppress_sig = 1;
- }
- }
- }
- }
-
- if (!suppress_sig) {
- ap_rputs(ap_psignature("", r), r);
- }
- if (!suppress_post) {
- ap_rputs("</body></html>\n", r);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-static char *find_title(request_rec *r)
-{
- char titlebuf[MAX_STRING_LEN], *find = "<title>";
- apr_file_t *thefile = NULL;
- int x = 0, y = 0, p = 0;
- apr_size_t n;
-
- if (r->status != HTTP_OK) {
- return NULL;
- }
- if ((r->content_type != NULL)
- && (!strcasecmp(ap_field_noparam(r->pool, r->content_type),
- "text/html")
- || !strcmp(r->content_type, INCLUDES_MAGIC_TYPE))
- && !r->content_encoding) {
- if (apr_file_open(&thefile, r->filename, APR_READ,
- APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
- return NULL;
- }
- n = sizeof(char) * (MAX_STRING_LEN - 1);
- apr_file_read(thefile, titlebuf, &n);
- if (n <= 0) {
- apr_file_close(thefile);
- return NULL;
- }
- titlebuf[n] = '\0';
- for (x = 0, p = 0; titlebuf[x]; x++) {
- if (apr_tolower(titlebuf[x]) == find[p]) {
- if (!find[++p]) {
- if ((p = ap_ind(&titlebuf[++x], '<'))
- != -1) {
- titlebuf[x + p] = '\0';
- }
- /* Scan for line breaks for Tanmoy's
- secretary
- */
- for (y = x; titlebuf[y]; y++) {
- if ((titlebuf[y] == CR)
- || (titlebuf[y] == LF)) {
- if (y == x) {
- x++;
- }
- else {
- titlebuf[y] = ' ';
- }
- }
- }
- apr_file_close(thefile);
- return apr_pstrdup(r->pool,
- &titlebuf[x]);
- }
- }
- else {
- p = 0;
- }
- }
- apr_file_close(thefile);
- }
- return NULL;
-}
-
-static struct ent *make_parent_entry(apr_int32_t autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction)
-{
- struct ent *p = NULL;
- char *testpath = NULL;
- /*
- * p->name is now the true parent URI.
- * testpath is a crafted lie, so that the syntax '/some/..'
- * (or simply '..')be used to describe 'up' from '/some/'
- * when processeing IndexIgnore, and Icon|Alt|Desc configs.
- */
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- /* The output has always been to the parent. Don't make ourself
- * our own parent (worthless cyclical reference).
- */
- if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
- return (NULL);
- }
- ap_getparents(p->name);
- if (!*p->name) {
- return (NULL);
- }
-
- /* IndexIgnore has always compared "/thispath/.." */
- testpath = ap_make_full_path(r->pool, r->filename, "..");
- if (ignore_entry(d, testpath)) {
- return (NULL);
- }
-
- p->size = -1;
- p->lm = -1;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = autoindex_opts & VERSION_SORT;
- if (autoindex_opts & FANCY_INDEXING) {
- if (!(p->icon = find_default_icon(d, testpath))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_default_alt(d, testpath))) {
- if (!(p->alt = find_default_alt(d, "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- p->desc = find_desc(d, testpath);
- }
- return p;
-}
-
-static struct ent *make_autoindex_entry(const apr_finfo_t *dirent,
- int autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction,
- const char *pattern)
-{
- request_rec *rr = NULL;
- struct ent *p = NULL;
- int show_forbidden = 0;
-
- /* Dot is ignored, Parent is handled by make_parent_entry() */
- if ((dirent->name[0] == '.') && (!dirent->name[1]
- || ((dirent->name[1] == '.')
- && !dirent->name[2])))
- return (NULL);
-
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (pattern && (apr_fnmatch(pattern, dirent->name,
- APR_FNM_NOESCAPE | APR_FNM_PERIOD
-#ifdef CASE_BLIND_FILESYSTEM
- | APR_FNM_CASE_BLIND
-#endif
- )
- != APR_SUCCESS)) {
- return (NULL);
- }
-
- if (ignore_entry(d, ap_make_full_path(r->pool,
- r->filename, dirent->name))) {
- return (NULL);
- }
-
- if (!(rr = ap_sub_req_lookup_dirent(dirent, r, AP_SUBREQ_NO_ARGS,
- NULL))) {
- return (NULL);
- }
-
- if((autoindex_opts & SHOW_FORBIDDEN)
- && (rr->status == HTTP_UNAUTHORIZED
- || rr->status == HTTP_FORBIDDEN)) {
- show_forbidden = 1;
- }
-
- if ((rr->finfo.filetype != APR_DIR && rr->finfo.filetype != APR_REG)
- || !(rr->status == OK || ap_is_HTTP_SUCCESS(rr->status)
- || ap_is_HTTP_REDIRECT(rr->status)
- || show_forbidden == 1)) {
- ap_destroy_sub_req(rr);
- return (NULL);
- }
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- if (dirent->filetype == APR_DIR) {
- p->name = apr_pstrcat(r->pool, dirent->name, "/", NULL);
- }
- else {
- p->name = apr_pstrdup(r->pool, dirent->name);
- }
- p->size = -1;
- p->icon = NULL;
- p->alt = NULL;
- p->desc = NULL;
- p->lm = -1;
- p->isdir = 0;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = !!(autoindex_opts & VERSION_SORT);
- p->ignore_case = !!(autoindex_opts & IGNORE_CASE);
-
- if (autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING)) {
- p->lm = rr->finfo.mtime;
- if (dirent->filetype == APR_DIR) {
- if (autoindex_opts & FOLDERS_FIRST) {
- p->isdir = 1;
- }
- rr->filename = ap_make_dirstr_parent (rr->pool,
- rr->filename);
-
- /* omit the trailing slash (1.3 compat) */
- rr->filename[strlen(rr->filename) - 1] = '\0';
-
- if (!(p->icon = find_icon(d, rr, 1))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_alt(d, rr, 1))) {
- if (!(p->alt = find_default_alt(d,
- "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- }
- else {
- p->icon = find_icon(d, rr, 0);
- p->alt = find_alt(d, rr, 0);
- p->size = rr->finfo.size;
- }
-
- p->desc = find_desc(d, rr->filename);
-
- if ((!p->desc) && (autoindex_opts & SCAN_HTML_TITLES)) {
- p->desc = apr_pstrdup(r->pool, find_title(rr));
- }
- }
- ap_destroy_sub_req(rr);
- /*
- * We don't need to take any special action for the file size key.
- * If we did, it would go here.
- */
- if (keyid == K_LAST_MOD) {
- if (p->lm < 0) {
- p->lm = 0;
- }
- }
- return (p);
-}
-
-static char *terminate_description(glusterfs_dir_config_t *d, char *desc,
- apr_int32_t autoindex_opts, int desc_width)
-{
- int maxsize = desc_width;
- register int x = 0;
-
- /*
- * If there's no DescriptionWidth in effect, default to the old
- * behaviour of adjusting the description size depending upon
- * what else is being displayed. Otherwise, stick with the
- * setting.
- */
- if (d->desc_adjust == K_UNSET) {
- if (autoindex_opts & SUPPRESS_ICON) {
- maxsize += 6;
- }
- if (autoindex_opts & SUPPRESS_LAST_MOD) {
- maxsize += 19;
- }
- if (autoindex_opts & SUPPRESS_SIZE) {
- maxsize += 7;
- }
- }
- for (x = 0; desc[x] && ((maxsize > 0) || (desc[x] == '<')); x++) {
- if (desc[x] == '<') {
- while (desc[x] != '>') {
- if (!desc[x]) {
- maxsize = 0;
- break;
- }
- ++x;
- }
- }
- else if (desc[x] == '&') {
- /* entities like &auml; count as one character */
- --maxsize;
- for ( ; desc[x] != ';'; ++x) {
- if (desc[x] == '\0') {
- maxsize = 0;
- break;
- }
- }
- }
- else {
- --maxsize;
- }
- }
- if (!maxsize && desc[x] != '\0') {
- desc[x - 1] = '>'; /* Grump. */
- desc[x] = '\0'; /* Double Grump! */
- }
- return desc;
-}
-
-/*
- * Emit the anchor for the specified field. If a field is the key for the
- * current request, the link changes its meaning to reverse the order when
- * selected again. Non-active fields always start in ascending order.
- */
-static void emit_link(request_rec *r, const char *anchor, char column,
- char curkey, char curdirection,
- const char *colargs, int nosort)
-{
- char qvalue[9];
-
- if (!nosort) {
-
- qvalue[0] = '?';
- qvalue[1] = 'C';
- qvalue[2] = '=';
- qvalue[3] = column;
- qvalue[4] = ';';
- qvalue[5] = 'O';
- qvalue[6] = '=';
- /* reverse? */
- qvalue[7] = ((curkey == column) && (curdirection == D_ASCENDING))
- ? D_DESCENDING : D_ASCENDING;
- qvalue[8] = '\0';
- ap_rvputs(r, "<a href=\"", qvalue, colargs ? colargs : "",
- "\">", anchor, "</a>", NULL);
- }
- else {
- ap_rputs(anchor, r);
- }
-}
-
-static void output_directories(struct ent **ar, int n,
- glusterfs_dir_config_t *d, request_rec *r,
- apr_int32_t autoindex_opts, char keyid,
- char direction, const char *colargs)
-{
- int x = 0;
- apr_size_t rv = APR_SUCCESS;
- char *name = NULL, *tp = NULL;
- int static_columns = 0;
- apr_pool_t *scratch = NULL;
- int name_width = 0, desc_width = 0, cols = 0;
- char *name_scratch = NULL, *pad_scratch = NULL, *breakrow = "";
- char *anchor = NULL, *t = NULL, *t2 = NULL;
- int nwidth = 0;
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts = {0, };
- char buf[5];
-
- name = r->uri;
- static_columns = !!(autoindex_opts & SUPPRESS_COLSORT);
- apr_pool_create(&scratch, r->pool);
- if (name[0] == '\0') {
- name = "/";
- }
-
- name_width = d->name_width;
- desc_width = d->desc_width;
-
- if ((autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING))
- == FANCY_INDEXING) {
- if (d->name_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- int t = 0;
- t = strlen(ar[x]->name);
- if (t > name_width) {
- name_width = t;
- }
- }
- }
-
- if (d->desc_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- if (ar[x]->desc != NULL) {
- int t = 0;
- t = strlen(ar[x]->desc);
- if (t > desc_width) {
- desc_width = t;
- }
- }
- }
- }
- }
- name_scratch = apr_palloc(r->pool, name_width + 1);
- pad_scratch = apr_palloc(r->pool, name_width + 1);
- memset(pad_scratch, ' ', name_width);
- pad_scratch[name_width] = '\0';
-
- if (autoindex_opts & TABLE_INDEXING) {
- cols = 1;
- ap_rputs("<table><tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<th>", r);
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"[ICO]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></th>", r);
- }
- else {
- ap_rputs("&nbsp;</th>", r);
- }
-
- ++cols;
- }
- ap_rputs("<th>", r);
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- breakrow = apr_psprintf(r->pool,
- "<tr><th colspan=\"%d\">"
- "<hr%s></th></tr>\n", cols,
- (autoindex_opts & EMIT_XHTML)
- ? " /" : "");
- }
- ap_rvputs(r, "</th></tr>", breakrow, NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- ap_rputs("<pre>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"Icon \"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("> ", r);
- }
- else {
- ap_rputs(" ", r);
- }
- }
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- ap_rputs(pad_scratch + 4, r);
- /*
- * Emit the guaranteed-at-least-one-space-between-columns byte.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputc('\n', r);
- }
- }
- else {
- ap_rputs("<ul>", r);
- }
-
- for (x = 0; x < n; x++) {
- apr_pool_clear(scratch);
-
- t = ar[x]->name;
- anchor = ap_escape_html(scratch, ap_os_escape_path(scratch, t,
- 0));
-
- if (!x && t[0] == '/') {
- t2 = "Parent Directory";
- }
- else {
- t2 = t;
- }
-
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rputs("<tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<td valign=\"top\">", r);
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ?
- ar[x]->alt : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs("&nbsp;", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a></td>", r);
- }
- else {
- ap_rputs("</td>", r);
- }
- }
- if (d->name_adjust == K_ADJUST) {
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2), "</a>",
- NULL);
- }
- else {
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- }
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts;
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "</td><td align=\"right\""
- ">%d-%b-%Y %H:%M ",
- &ts);
- ap_rputs(time_str, r);
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rvputs(r, "</td><td align=\"right\">",
- apr_strfsize(ar[x]->size, buf), NULL);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- if (d->desc_adjust == K_ADJUST) {
- ap_rvputs(r, "</td><td>",
- ar[x]->desc, NULL);
- }
- else {
- ap_rvputs(r, "</td><td>",
- terminate_description(d, ar[x]->desc,
- autoindex_opts,
- desc_width), NULL);
- }
- }
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- ap_rputs("</td></tr>\n", r);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ? ar[x]->alt
- : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs(" ", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a> ", r);
- }
- else {
- ap_rputc(' ', r);
- }
- }
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- /*
- * The blank before the storm.. er, before the next
- * field.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "%d-%b-%Y %H:%M ", &ts);
- ap_rputs(time_str, r);
- }
- else {
- /* Length="22-Feb-1998 23:42 "
- * (see 4 lines above)
- */
- ap_rputs(" ", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs(apr_strfsize(ar[x]->size, buf), r);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- ap_rputs(terminate_description(d,
- ar[x]->desc,
- autoindex_opts,
- desc_width), r);
- }
- }
- ap_rputc('\n', r);
- }
- else {
- ap_rvputs(r, "<li><a href=\"", anchor, "\"> ",
- ap_escape_html(scratch, t2),
- "</a></li>\n", NULL);
- }
- }
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rvputs(r, breakrow, "</table>\n", NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></pre>\n", r);
- }
- else {
- ap_rputs("</pre>\n", r);
- }
- }
- else {
- ap_rputs("</ul>\n", r);
- }
-}
-
-/*
- * Compare two file entries according to the sort criteria. The return
- * is essentially a signum function value.
- */
-
-static int dsortf(struct ent **e1, struct ent **e2)
-{
- struct ent *c1 = NULL, *c2 = NULL;
- int result = 0;
-
- /*
- * First, see if either of the entries is for the parent directory.
- * If so, that *always* sorts lower than anything else.
- */
- if ((*e1)->name[0] == '/') {
- return -1;
- }
- if ((*e2)->name[0] == '/') {
- return 1;
- }
- /*
- * Now see if one's a directory and one isn't, if we're set
- * isdir for FOLDERS_FIRST.
- */
- if ((*e1)->isdir != (*e2)->isdir) {
- return (*e1)->isdir ? -1 : 1;
- }
- /*
- * All of our comparisons will be of the c1 entry against the c2 one,
- * so assign them appropriately to take care of the ordering.
- */
- if ((*e1)->ascending) {
- c1 = *e1;
- c2 = *e2;
- }
- else {
- c1 = *e2;
- c2 = *e1;
- }
-
- switch (c1->key) {
- case K_LAST_MOD:
- if (c1->lm > c2->lm) {
- return 1;
- }
- else if (c1->lm < c2->lm) {
- return -1;
- }
- break;
- case K_SIZE:
- if (c1->size > c2->size) {
- return 1;
- }
- else if (c1->size < c2->size) {
- return -1;
- }
- break;
- case K_DESC:
- if (c1->version_sort) {
- result = apr_strnatcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- else {
- result = strcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- if (result) {
- return result;
- }
- break;
- }
-
- /* names may identical when treated case-insensitively,
- * so always fall back on strcmp() flavors to put entries
- * in deterministic order. This means that 'ABC' and 'abc'
- * will always appear in the same order, rather than
- * variably between 'ABC abc' and 'abc ABC' order.
- */
-
- if (c1->version_sort) {
- if (c1->ignore_case) {
- result = apr_strnatcasecmp (c1->name, c2->name);
- }
- if (!result) {
- result = apr_strnatcmp(c1->name, c2->name);
- }
- }
-
- /* The names may be identical in respects other other than
- * filename case when strnatcmp is used above, so fall back
- * to strcmp on conflicts so that fn1.01.zzz and fn1.1.zzz
- * are also sorted in a deterministic order.
- */
-
- if (!result && c1->ignore_case) {
- result = strcasecmp (c1->name, c2->name);
- }
-
- if (!result) {
- result = strcmp (c1->name, c2->name);
- }
-
- return result;
-}
-
-
-static int
-mod_glfs_index_directory (request_rec *r,
- glusterfs_dir_config_t *autoindex_conf)
-{
- char *title_name = NULL, *title_endp = NULL;
- char *pstring = NULL, *colargs = NULL;
- char *path = NULL, *fname = NULL, *charset = NULL;
- char *fullpath = NULL, *name = NULL, *ctype = NULL;
- apr_finfo_t dirent;
- glusterfs_file_t fd = NULL;
- apr_status_t status = APR_SUCCESS;
- int num_ent = 0, x;
- struct ent *head = NULL, *p = NULL;
- struct ent **ar = NULL;
- const char *qstring = NULL;
- apr_int32_t autoindex_opts = autoindex_conf->opts;
- char keyid, direction;
- apr_size_t dirpathlen;
- glusterfs_dir_config_t *dir_config = NULL;
- int ret = -1;
- struct dirent *entry = NULL;
- struct stat st = {0, };
-
- name = r->filename;
- title_name = ap_escape_html(r->pool, r->uri);
- ctype = "text/html";
- dir_config = mod_glfs_dconfig (r);
- if (dir_config == NULL) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- path = r->uri;
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- if (autoindex_conf->ctype) {
- ctype = autoindex_conf->ctype;
- }
- if (autoindex_conf->charset) {
- charset = autoindex_conf->charset;
- }
- else {
-#if APR_HAS_UNICODE_FS
- charset = "UTF-8";
-#else
- charset = "ISO-8859-1";
-#endif
- }
- if (*charset) {
- ap_set_content_type(r, apr_pstrcat(r->pool, ctype, ";charset=",
- charset, NULL));
- }
- else {
- ap_set_content_type(r, ctype);
- }
-
- if (autoindex_opts & TRACK_MODIFIED) {
- ap_update_mtime(r, r->finfo.mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- }
- if (r->header_only) {
- glusterfs_close (fd);
- return 0;
- }
-
- /*
- * If there is no specific ordering defined for this directory,
- * default to ascending by filename.
- */
- keyid = autoindex_conf->default_keyid
- ? autoindex_conf->default_keyid : K_NAME;
- direction = autoindex_conf->default_direction
- ? autoindex_conf->default_direction : D_ASCENDING;
-
- /*
- * Figure out what sort of indexing (if any) we're supposed to use.
- *
- * If no QUERY_STRING was specified or client query strings have been
- * explicitly disabled.
- * If we are ignoring the client, suppress column sorting as well.
- */
- if (autoindex_opts & IGNORE_CLIENT) {
- qstring = NULL;
- autoindex_opts |= SUPPRESS_COLSORT;
- colargs = "";
- }
- else {
- char fval[5], vval[5], *ppre = "", *epattern = "";
- fval[0] = '\0'; vval[0] = '\0';
- qstring = r->args;
-
- while (qstring && *qstring) {
-
- /* C= First Sort key Column (N, M, S, D) */
- if ( qstring[0] == 'C' && qstring[1] == '='
- && qstring[2] && strchr(K_VALID, qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- keyid = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* O= Sort order (A, D) */
- else if ( qstring[0] == 'O' && qstring[1] == '='
- && ( (qstring[2] == D_ASCENDING)
- || (qstring[2] == D_DESCENDING))
- && ( qstring[3] == '&'
- || qstring[3] == ';'
- || !qstring[3])) {
- direction = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* F= Output Format (0 plain, 1 fancy (pre), 2 table) */
- else if ( qstring[0] == 'F' && qstring[1] == '='
- && qstring[2] && strchr("012", qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~(FANCY_INDEXING
- | TABLE_INDEXING);
- }
- else if (qstring[2] == '1') {
- autoindex_opts = (autoindex_opts
- | FANCY_INDEXING)
- & ~TABLE_INDEXING;
- }
- else if (qstring[2] == '2') {
- autoindex_opts |= FANCY_INDEXING
- | TABLE_INDEXING;
- }
- strcpy(fval, ";F= ");
- fval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* V= Version sort (0, 1) */
- else if ( qstring[0] == 'V' && qstring[1] == '='
- && (qstring[2] == '0' || qstring[2] == '1')
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~VERSION_SORT;
- }
- else if (qstring[2] == '1') {
- autoindex_opts |= VERSION_SORT;
- }
- strcpy(vval, ";V= ");
- vval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* P= wildcard pattern (*.foo) */
- else if (qstring[0] == 'P' && qstring[1] == '=') {
- const char *eos = qstring += 2; /* for efficiency */
-
- while (*eos && *eos != '&' && *eos != ';') {
- ++eos;
- }
-
- if (eos == qstring) {
- pstring = NULL;
- }
- else {
- pstring = apr_pstrndup(r->pool, qstring,
- eos - qstring);
- if (ap_unescape_url(pstring) != OK) {
- /* ignore the pattern, if it's bad. */
- pstring = NULL;
- }
- else {
- ppre = ";P=";
- /* be correct */
- epattern = ap_escape_uri(r->pool,
- pstring);
- }
- }
-
- if (*eos && *++eos) {
- qstring = eos;
- }
- else {
- qstring = NULL;
- }
- }
-
- /* Syntax error? Ignore the remainder! */
- else {
- qstring = NULL;
- }
- }
- colargs = apr_pstrcat(r->pool, fval, vval, ppre, epattern,
- NULL);
- }
-
- /* Spew HTML preamble */
- title_endp = title_name + strlen(title_name) - 1;
-
- while (title_endp > title_name && *title_endp == '/') {
- *title_endp-- = '\0';
- }
-
- emit_head(r, find_header(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE,
- autoindex_opts & EMIT_XHTML, title_name);
-
- /*
- * Since we don't know how many dir. entries there are, put them into a
- * linked list and then arrayificate them so qsort can use them.
- */
- head = NULL;
- p = make_parent_entry(autoindex_opts, autoindex_conf, r, keyid,
- direction);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- fullpath = apr_palloc(r->pool, APR_PATH_MAX);
- dirpathlen = strlen(name);
- memcpy(fullpath, name, dirpathlen);
-
- do {
- entry = glusterfs_readdir (fd);
- if (entry == NULL) {
- break;
- }
-
- fname = apr_pstrcat (r->pool, path, entry->d_name, NULL);
-
- ret = glusterfs_stat (fname, &st);
- if (ret != 0) {
- break;
- }
-
- dirent.fname = fname;
- dirent.name = apr_pstrdup (r->pool, entry->d_name);
- fill_out_finfo (&dirent, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT
- | APR_FINFO_NLINK | APR_FINFO_OWNER
- | APR_FINFO_PROT);
-
- p = make_autoindex_entry(&dirent, autoindex_opts,
- autoindex_conf, r,
- keyid, direction, pstring);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- } while (1);
-
- if (num_ent > 0) {
- ar = (struct ent **) apr_palloc(r->pool,
- num_ent * sizeof(struct ent *));
- p = head;
- x = 0;
- while (p) {
- ar[x++] = p;
- p = p->next;
- }
-
- qsort((void *) ar, num_ent, sizeof(struct ent *),
- (int (*)(const void *, const void *)) dsortf);
- }
- output_directories(ar, num_ent, autoindex_conf, r, autoindex_opts,
- keyid, direction, colargs);
- glusterfs_close (fd);
-
- emit_tail(r, find_readme(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE);
-
- return 0;
-}
-
-
-static int
-handle_autoindex(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int allow_opts;
-
- allow_opts = ap_allow_options(r);
-
- r->allowed |= (AP_METHOD_BIT << M_GET);
- if (r->method_number != M_GET) {
- return DECLINED;
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- /* OK, nothing easy. Trot out the heavy artillery... */
-
- if (allow_opts & OPT_INDEXES) {
- int errstatus;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- /* KLUDGE --- make the sub_req lookups happen in the right
- * directory. Fixing this in the sub_req_lookup functions
- * themselves is difficult, and would probably break
- * virtual includes...
- */
-
- if (r->filename[strlen(r->filename) - 1] != '/') {
- r->filename = apr_pstrcat(r->pool, r->filename, "/",
- NULL);
- }
- return mod_glfs_index_directory(r, dir_config);
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Directory index forbidden by "
- "Options directive: %s", r->filename);
- return HTTP_FORBIDDEN;
- }
-}
-
-
-static int
-mod_glfs_handler (request_rec *r)
-{
- conn_rec *c = r->connection;
- apr_bucket_brigade *bb;
- apr_bucket *e;
- core_dir_config *d;
- int errstatus;
- glusterfs_file_t fd = NULL;
- apr_status_t status;
- glusterfs_dir_config_t *dir_config = NULL;
- char *path = NULL;
- int num_ranges = 0;
- apr_size_t size = 0;
- apr_off_t range_start = 0, range_end = 0;
- char *current = NULL;
- apr_status_t rv = 0;
- core_request_config *req_cfg = NULL;
-
- /* XXX if/when somebody writes a content-md5 filter we either need to
- * remove this support or coordinate when to use the filter vs.
- * when to use this code
- * The current choice of when to compute the md5 here matches the 1.3
- * support fairly closely (unlike 1.3, we don't handle computing md5
- * when the charset is translated).
- */
-
- int bld_content_md5;
- if (!r->handler || (r->handler
- && strcmp (r->handler, GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0') {
- return DECLINED;
- }
-
- if (r->finfo.filetype == APR_DIR) {
- return handle_autoindex (r);
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- ap_allow_standard_methods(r, MERGE_ALLOW, M_GET, -1);
-
- /* We understood the (non-GET) method, but it might not be legal for
- this particular resource. Check to see if the 'deliver_script'
- flag is set. If so, then we go ahead and deliver the file since
- it isn't really content (only GET normally returns content).
-
- Note: based on logic further above, the only possible non-GET
- method at this point is POST. In the future, we should enable
- script delivery for all methods. */
- if (r->method_number != M_GET) {
- req_cfg = ap_get_module_config(r->request_config, &core_module);
- if (!req_cfg->deliver_script) {
- /* The flag hasn't been set for this request. Punt. */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "This resource does not accept the %s "
- "method.",
- r->method);
- return HTTP_METHOD_NOT_ALLOWED;
- }
- }
-
- d = (core_dir_config *)ap_get_module_config(r->per_dir_config,
- &core_module);
- bld_content_md5 = (d->content_md5 & 1)
- && r->output_filters->frec->ftype != AP_FTYPE_RESOURCE;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- if (r->finfo.filetype == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s", r->filename);
- return HTTP_NOT_FOUND;
- }
-
- if ((r->used_path_info != AP_REQ_ACCEPT_PATH_INFO) &&
- r->path_info && *r->path_info)
- {
- /* default to reject */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s",
- apr_pstrcat(r->pool, r->filename, r->path_info,
- NULL));
- return HTTP_NOT_FOUND;
- }
-
- ap_update_mtime (r, r->finfo.mtime);
- ap_set_last_modified (r);
- ap_set_etag (r);
- apr_table_setn (r->headers_out, "Accept-Ranges", "bytes");
-
- num_ranges = ap_set_byterange(r);
- if (num_ranges == 0) {
- size = r->finfo.size;
- } else {
- char *tmp = apr_pstrdup (r->pool, r->range);
- while ((current = ap_getword(r->pool, (const char **)&tmp, ','))
- && (rv = parse_byterange(current, r->finfo.size,
- &range_start, &range_end))) {
- size += (range_end - range_start);
- }
- }
-
- ap_set_content_length (r, size);
-
- if ((errstatus = ap_meets_conditions(r)) != OK) {
- r->status = errstatus;
- }
-
- /*
- * file is small enough to have already got the content in
- * glusterfs_lookup
- */
- if (r->finfo.size <= dir_config->xattr_file_size && dir_config->buf) {
- if (bld_content_md5) {
- apr_table_setn (r->headers_out, "Content-MD5",
- (const char *)ap_md5_binary(r->pool,
- dir_config->buf
- , r->finfo.size));
- }
-
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, 0, r,
- "fetching data from glusterfs through xattr "
- "interface\n");
-
- bb = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_heap_create (dir_config->buf, r->finfo.size,
- free, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bb, e);
-
- dir_config->buf = NULL;
-
- /* let the byterange_filter handle multipart requests */
- status = ap_pass_brigade(r->output_filters, bb);
- if (status == APR_SUCCESS
- || r->status != HTTP_OK
- || c->aborted) {
- return OK;
- }
- else {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- return HTTP_INTERNAL_SERVER_ERROR;
- }
- }
-
- /* do standard open/read/close to fetch content */
- path = r->uri;
-
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- /*
- * byterange_filter cannot handle range requests, since we are not
- * sending the whole data in a single brigade
- */
-
-
- if (num_ranges == 0) {
- mod_glfs_read_async (r, NULL, fd, 0, -1);
- } else {
- mod_glfs_handle_byte_ranges (r, fd, num_ranges);
- }
-
- glusterfs_close (fd);
-}
-
-
-#if 0
-static apr_status_t
-mod_glfs_output_filter (ap_filter_t *f,
- apr_bucket_brigade *b)
-{
- size_t size = 0;
- apr_bucket_t *e = NULL;
- size = atol (apr_table_get (r->notes, MOD_GLFS_SIZE));
-
- for (e = APR_BRIGADE_FIRST(b);
- e != APR_BRIGADE_SENTINEL(b);
- e = APR_BUCKET_NEXT(e))
- {
- /* FIXME: can there be more than one heap buckets? */
- if (e->type == &apr_bucket_type_heap) {
- break;
- }
- }
-
- if (e != APR_BRIGADE_SENTINEL(b)) {
- e->length = size;
- }
-
- return ap_pass_brigade (f->next, b);
-}
-#endif
-
-static int
-mod_glfs_fixup_dir(request_rec *r)
-{
- glusterfs_dir_config_t *d = NULL;
- char *dummy_ptr[1];
- char **names_ptr = NULL, *name_ptr = NULL;
- int num_names;
- int error_notfound = 0;
- char *ifile = NULL;
- request_rec *rr = NULL;
-
- /* only handle requests against directories */
- if (r->finfo.filetype != APR_DIR) {
- return DECLINED;
- }
-
- if (!r->handler || strcmp (r->handler, GLUSTERFS_HANDLER)) {
- return DECLINED;
- }
-
- /* Never tolerate path_info on dir requests */
- if (r->path_info && *r->path_info) {
- return DECLINED;
- }
-
- d = (glusterfs_dir_config_t *)ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- /* Redirect requests that are not '/' terminated */
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] != '/')
- {
- if (!d->do_slash) {
- return DECLINED;
- }
-
- /* Only redirect non-get requests if we have no note to warn
- * that this browser cannot handle redirs on non-GET requests
- * (such as Microsoft's WebFolders).
- */
- if ((r->method_number != M_GET)
- && apr_table_get(r->subprocess_env, "redirect-carefully")) {
- return DECLINED;
- }
-
- if (r->args != NULL) {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", "?", r->args, NULL);
- }
- else {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", NULL);
- }
-
- apr_table_setn(r->headers_out, "Location",
- ap_construct_url(r->pool, ifile, r));
- return HTTP_MOVED_PERMANENTLY;
- }
-
- if (d->index_names) {
- names_ptr = (char **)d->index_names->elts;
- num_names = d->index_names->nelts;
- }
- else {
- dummy_ptr[0] = AP_DEFAULT_INDEX;
- names_ptr = dummy_ptr;
- num_names = 1;
- }
-
- for (; num_names; ++names_ptr, --num_names) {
- /* XXX: Is this name_ptr considered escaped yet, or not??? */
- name_ptr = *names_ptr;
-
- /* Once upon a time args were handled _after_ the successful
- * redirect. But that redirect might then _refuse_ the
- * given r->args, creating a nasty tangle. It seems safer to
- * consider the r->args while we determine if name_ptr is our
- * viable index, and therefore set them up correctly on redirect.
- */
- if (r->args != NULL) {
- name_ptr = apr_pstrcat(r->pool, name_ptr, "?", r->args,
- NULL);
- }
-
- rr = ap_sub_req_lookup_uri(name_ptr, r, NULL);
-
- /* The sub request lookup is very liberal, and the core
- * map_to_storage handler will almost always result in HTTP_OK
- * as /foo/index.html may be /foo with PATH_INFO="/index.html",
- * or even / with PATH_INFO="/foo/index.html". To get around
- * this we insist that the the index be a regular filetype.
- *
- * Another reason is that the core handler also makes the
- * assumption that if r->finfo is still NULL by the time it
- * gets called, the file does not exist.
- */
- if (rr->status == HTTP_OK
- && ( (rr->handler && !strcmp(rr->handler, "proxy-server"))
- || rr->finfo.filetype == APR_REG)) {
- ap_internal_fast_redirect(rr, r);
- return OK;
- }
-
- /* If the request returned a redirect, propagate it to the
- * client
- */
-
- if (ap_is_HTTP_REDIRECT(rr->status)
- || (rr->status == HTTP_NOT_ACCEPTABLE && num_names == 1)
- || (rr->status == HTTP_UNAUTHORIZED && num_names == 1)) {
-
- apr_pool_join(r->pool, rr->pool);
- error_notfound = rr->status;
- r->notes = apr_table_overlay(r->pool, r->notes,
- rr->notes);
- r->headers_out = apr_table_overlay(r->pool,
- r->headers_out,
- rr->headers_out);
- r->err_headers_out = apr_table_overlay(r->pool,
- r->err_headers_out,
- rr->err_headers_out);
- return error_notfound;
- }
-
- /* If the request returned something other than 404 (or 200),
- * it means the module encountered some sort of problem. To be
- * secure, we should return the error, rather than allow
- * autoindex to create a (possibly unsafe) directory index.
- *
- * So we store the error, and if none of the listed files
- * exist, we return the last error response we got, instead
- * of a directory listing.
- */
- if (rr->status && rr->status != HTTP_NOT_FOUND
- && rr->status != HTTP_OK) {
- error_notfound = rr->status;
- }
-
- ap_destroy_sub_req(rr);
- }
-
- if (error_notfound) {
- return error_notfound;
- }
-
- /* nothing for us to do, pass on through */
- return DECLINED;
-}
-
-
-static void
-mod_glfs_register_hooks(apr_pool_t *p)
-{
- ap_hook_child_init (mod_glfs_child_init, NULL, NULL, APR_HOOK_MIDDLE);
- ap_hook_handler (mod_glfs_handler, NULL, NULL, APR_HOOK_REALLY_FIRST);
- ap_hook_map_to_storage (mod_glfs_map_to_storage, NULL, NULL,
- APR_HOOK_REALLY_FIRST);
- ap_hook_fixups(mod_glfs_fixup_dir,NULL,NULL,APR_HOOK_LAST);
-
-/* mod_glfs_output_filter_handle =
- ap_register_output_filter ("MODGLFS", mod_glfs_output_filter,
- NULL, AP_FTYPE_PROTOCOL); */
-}
-
-static const char *
-cmd_add_index (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *d = dummy;
-
- if (!d->index_names) {
- d->index_names = apr_array_make(cmd->pool, 2, sizeof(char *));
- }
- *(const char **)apr_array_push(d->index_names) = arg;
- return NULL;
-}
-
-static const char *
-cmd_configure_slash (cmd_parms *cmd, void *d_, int arg)
-{
- glusterfs_dir_config_t *d = d_;
-
- d->do_slash = arg ? SLASH_ON : SLASH_OFF;
- return NULL;
-}
-
-#define DIR_CMD_PERMS OR_INDEXES
-
-static const
-command_rec mod_glfs_cmds[] =
-{
- AP_INIT_TAKE1(
- "GlusterfsLogfile",
- cmd_add_logfile,
- NULL,
- ACCESS_CONF, /*FIXME: allow overriding in .htaccess files */
- "Glusterfs logfile"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsLoglevel",
- cmd_set_loglevel,
- NULL,
- ACCESS_CONF,
- "Glusterfs loglevel:anyone of none, critical, error, warning, "
- "debug"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsCacheTimeout",
- cmd_set_cache_timeout,
- NULL,
- ACCESS_CONF,
- "Timeout value in seconds for lookup and stat cache of "
- "libglusterfsclient"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsVolumeSpecfile",
- cmd_add_volume_specfile,
- NULL,
- ACCESS_CONF,
- "Glusterfs Volume specfication file specifying filesystem "
- "under this directory"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsXattrFileSize",
- cmd_add_xattr_file_size,
- NULL,
- ACCESS_CONF,
- "Maximum size of the file that can be fetched through "
- "extended attribute interface of libglusterfsclient"
- ),
-
- /* mod_dir cmds */
- AP_INIT_ITERATE("DirectoryIndex", cmd_add_index,
- NULL, DIR_CMD_PERMS,
- "a list of file names"),
-
- AP_INIT_FLAG("DirectorySlash", cmd_configure_slash,
- NULL, DIR_CMD_PERMS,
- "On or Off"),
-
- /* autoindex cmds */
- AP_INIT_ITERATE2("AddIcon", cmd_add_icon,
- BY_PATH, DIR_CMD_PERMS,
- "an icon URL followed by one or more filenames"),
-
- AP_INIT_ITERATE2("AddIconByType", cmd_add_icon,
- BY_TYPE, DIR_CMD_PERMS,
- "an icon URL followed by one or more MIME types"),
-
- AP_INIT_ITERATE2("AddIconByEncoding", cmd_add_icon,
- BY_ENCODING, DIR_CMD_PERMS,
- "an icon URL followed by one or more content encodings"),
-
- AP_INIT_ITERATE2("AddAlt", cmd_add_alt, BY_PATH,
- DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "filenames"),
-
- AP_INIT_ITERATE2("AddAltByType", cmd_add_alt,
- BY_TYPE, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "MIME types"),
-
- AP_INIT_ITERATE2("AddAltByEncoding", cmd_add_alt,
- BY_ENCODING, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "content encodings"),
-
- AP_INIT_TAKE_ARGV("IndexOptions", cmd_add_opts,
- NULL, DIR_CMD_PERMS,
- "one or more index options [+|-][]"),
-
- AP_INIT_TAKE2("IndexOrderDefault", cmd_set_default_order,
- NULL, DIR_CMD_PERMS,
- "{Ascending,Descending} {Name,Size,Description,Date}"),
-
- AP_INIT_ITERATE("IndexIgnore", cmd_add_ignore,
- NULL, DIR_CMD_PERMS,
- "one or more file extensions"),
-
- AP_INIT_ITERATE2("AddDescription", cmd_add_desc,
- BY_PATH, DIR_CMD_PERMS,
- "Descriptive text followed by one or more filenames"),
-
- AP_INIT_TAKE1("HeaderName", cmd_add_header,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_TAKE1("ReadmeName", cmd_add_readme,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_RAW_ARGS("FancyIndexing", ap_set_deprecated,
- NULL, OR_ALL,
- "The FancyIndexing directive is no longer supported. "
- "Use IndexOptions FancyIndexing."),
-
- AP_INIT_TAKE1("DefaultIcon", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t,
- default_icon),
- DIR_CMD_PERMS, "an icon URL"),
-
- AP_INIT_TAKE1("IndexStyleSheet", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t, style_sheet),
- DIR_CMD_PERMS, "URL to style sheet"),
-
- {NULL}
-};
-
-module AP_MODULE_DECLARE_DATA glusterfs_module =
-{
- STANDARD20_MODULE_STUFF,
- mod_glfs_create_dir_config,
- mod_glfs_merge_dir_config,
- NULL, //mod_glfs_create_server_config,
- NULL, //mod_glfs_merge_server_config,
- mod_glfs_cmds,
- mod_glfs_register_hooks,
-};
diff --git a/mod_glusterfs/apache/Makefile.am b/mod_glusterfs/apache/Makefile.am
deleted file mode 100644
index bda039310..000000000
--- a/mod_glusterfs/apache/Makefile.am
+++ /dev/null
@@ -1,10 +0,0 @@
-SUBDIRS = $(MOD_GLUSTERFS_HTTPD_VERSION)
-
-EXTRA_DIST = 1.3/Makefile.am 1.3/Makefile.in \
- 1.3/src/Makefile.am 1.3/src/Makefile.in \
- 1.3/src/mod_glusterfs.c \
- 1.3/src/README.txt \
- 2.2/Makefile.am 2.2/Makefile.in \
- 2.2/src/Makefile.am 2.2/src/Makefile.in \
- 2.2/src/mod_glusterfs.c
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am b/mod_glusterfs/lighttpd/1.4/Makefile.am
deleted file mode 100644
index eda329111..000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff b/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
deleted file mode 100644
index 375696b5d..000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.4/README.txt b/mod_glusterfs/lighttpd/1.4/README.txt
deleted file mode 100644
index 786a146e4..000000000
--- a/mod_glusterfs/lighttpd/1.4/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.4, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.4 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.4 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.4/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.4/
-# cd /home/glusterfs/lighttpd-1.4
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
deleted file mode 100644
index 295c9704c..000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "http_chunk.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/*
- enables debug code for testing if all nodes in the stat-cache as accessable
-*/
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/*
- Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE
- results in a network_backend_write of the read data
-*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_iobuf_t *buf;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-} glusterfs_async_local_t;
-
-
-typedef struct {
- glusterfs_file_t fd;
- void *buf;
- buffer *glusterfs_path;
- /* off_t response_content_length; */
- int prefix;
-}mod_glusterfs_ctx_t;
-
-/* plugin config for all request/connections */
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
- char mounted;
-} plugin_config;
-
-static int (*network_backend_write)(struct server *srv, connection *con, int fd,
- chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
-
- plugin_config conf;
-} plugin_data;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_iobuf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the
- * monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-int
-mod_glusterfs_readv_async_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-static int
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- glusterfs_file_t fd = glusterfs_chunk->file.name;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if ((int)length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- unsigned long check = 0;
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = buffer_init ();
-
- check += buf->vector[i].iov_len;
-
- nw_write_buf->used = buf->vector[i].iov_len + 1;
- nw_write_buf->size = buf->vector[i].iov_len;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- offset += local.op_ret;
- chunkqueue_append_buffer_weak(cq, nw_write_buf);
- }
-
- network_backend_write (srv, con, con->fd, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (gf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start =(char *)gf_cq;
- return local.read_bytes;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? HANDLER_FINISHED : HANDLER_GO_ON);
-}
-
-int mod_glusterfs_network_backend_write(struct server *srv, connection *con,
- int fd, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- network_backend_write (srv, con, fd, cq);
-
- prev->next = c;
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- mod_glusterfs_chunkqueue *gf_cq = NULL;
-
- gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- network_backend_write (srv, con, fd, gf_cq->cq);
-
- if ((size_t)chunkqueue_written (gf_cq->cq)
- != gf_cq->length) {
- cq->first = first;
- return chunks_written;
- }
- for (tmp = gf_cq->cq->first ; tmp;
- tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- mod_glusterfs_read_async (srv, con, c);
- if (c->file.mmap.start) {
- /* pending chunkqueue from
- mod_glusterfs_read_async to be written to
- network */
- cq->first = first;
- return chunks_written;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- network_backend_write (srv, con, fd, cq);
-
- cq->first = first;
-
- return chunks_written;
-}
-
-int chunkqueue_append_glusterfs_file (connection *con, glusterfs_file_t fd,
- off_t offset, size_t len, size_t buf_size)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (con->write_queue);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- buffer_free (c->mem);
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->size = buf_size;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- p = calloc(1, sizeof(*p));
- network_backend_write = NULL;
-
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY,
- T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and
- remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET,
- T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1,
- srv->config_context->used
- * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage);*/
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->document_root = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv,
- ((data_config *)srv->config_context->data[i])->value,
- cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con,
- plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- /* skip the first, the global context */
- /*
- glusterfs related config can only occur inside
- $HTTP["url"] == "<glusterfs-prefix>"
- */
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.document_root = NULL;
-
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con,
- plugin_data *p) {
- int multipart = 0;
- int error;
- off_t start, end;
- const char *s, *minus;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- SEGFAULT();
- }
-
- start = 0;
- end = sce->st.st_size - 1;
-
- con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers,
- "Content-Type"))) {
- content_type = ds->value;
- }
-
- for (s = con->request.http_range, error = 0;
- !error && *s && NULL != (minus = strchr(s, '-')); ) {
- char *err;
- off_t la, le;
-
- if (s == minus) {
- /* -<stop> */
-
- le = strtoll(s, &err, 10);
-
- if (le == 0) {
- /* RFC 2616 - 14.35.1 */
-
- con->http_status = 416;
- error = 1;
- } else if (*err == '\0') {
- /* end */
- s = err;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else {
- error = 1;
- }
-
- } else if (*(minus+1) == '\0' || *(minus+1) == ',') {
- /* <start>- */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- /* ok */
-
- if (*(err + 1) == '\0') {
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = la;
-
- } else if (*(err + 1) == ',') {
- multipart = 1;
- s = err + 2;
-
- end = sce->st.st_size - 1;
- start = la;
- } else {
- error = 1;
- }
- } else {
- /* error */
- error = 1;
- }
- } else {
- /* <start>-<stop> */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- le = strtoll(minus+1, &err, 10);
-
- /* RFC 2616 - 14.35.1 */
- if (la > le) {
- error = 1;
- }
-
- if (*err == '\0') {
- /* ok, end*/
- s = err;
-
- end = le;
- start = la;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = le;
- start = la;
- } else {
- /* error */
-
- error = 1;
- }
- } else {
- /* error */
-
- error = 1;
- }
- }
-
- if (!error) {
- if (start < 0) start = 0;
-
- /* RFC 2616 - 14.35.1 */
- if (end > sce->st.st_size - 1) end = sce->st.st_size - 1;
-
- if (start > sce->st.st_size - 1) {
- error = 1;
-
- con->http_status = 416;
- }
- }
-
- if (!error) {
- if (multipart) {
- /* write boundary-header */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: "
- "bytes ");
- buffer_append_off_t(b, start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
-
- }
-
- if ((size_t)sce->st.st_size >= size) {
- chunkqueue_append_glusterfs_file(con, ctx->fd,
- start,
- end - start,
- size);
- } else {
- if (!start) {
- buffer *mem = buffer_init ();
- mem->ptr = ctx->buf;
- mem->used = mem->size = sce->st.st_size;
- http_chunk_append_buffer (srv, con, mem);
- ctx->buf = NULL;
- } else {
- chunkqueue_append_mem (con->write_queue,
- ((char *)ctx->buf)
- + start,
- end - start + 1);
- }
- }
-
- con->response.content_length += end - start + 1;
- }
- }
-
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- /* something went wrong */
- if (error) return -1;
-
- if (multipart) {
- /* add boundary end */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(p->range_buf));
- } else {
- /* add Content-Range-header */
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"),
- CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
- size_t size = 0;
- int ret = 0;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
- if (!p->conf.prefix || p->conf.prefix->used == 0) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (!p->conf.mounted) {
- glusterfs_init_params_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- ret = glusterfs_mount (p->conf.prefix->ptr, &ctx);
- if (ret != 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "glusterfs initialization failed, "
- "please check your configuration. "
- "Glusterfs logfile ",
- p->conf.logfile,
- "might contain details");
- return HANDLER_FINISHED;
- }
- p->conf.mounted = 1;
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
-/* FIXME: what if multiple files are requested from a single connection? */
-/* TODO: check whether this works fine for HTTP protocol 1.1 */
-
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path,
- p->conf.prefix);
- buffer_append_string (plugin_ctx->glusterfs_path,
- p->conf.document_root->ptr);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path,
- con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path,
- plugin_ctx->glusterfs_path);
-
- if (glusterfs_stat_cache_get_entry (srv, con,
- plugin_ctx->glusterfs_path,
- con->physical.path, plugin_ctx->buf,
- size, &sce) == HANDLER_ERROR) {
- if (errno == ENOENT)
- con->http_status = 404;
- else
- con->http_status = 403;
-
- free (plugin_ctx->buf);
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
- plugin_ctx->buf = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (!(S_ISREG (sce->st.st_mode) && (size_t)sce->st.st_size < size)) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
- }
-
- return HANDLER_GO_ON;
-}
-
-static int http_chunk_append_len(server *srv, connection *con, size_t len) {
- size_t i, olen = len, j;
- buffer *b;
-
- b = srv->tmp_chunk_len;
-
- if (len == 0) {
- buffer_copy_string(b, "0");
- } else {
- for (i = 0; i < 8 && len; i++) {
- len >>= 4;
- }
-
- /* i is the number of hex digits we have */
- buffer_prepare_copy(b, i + 1);
-
- for (j = i-1, len = olen; j+1 > 0; j--) {
- b->ptr[j] = (len & 0xf) + (((len & 0xf) <= 9) ?
- '0' : 'a' - 10);
- len >>= 4;
- }
- b->used = i;
- b->ptr[b->used++] = '\0';
- }
-
- buffer_append_string(b, "\r\n");
- chunkqueue_append_buffer(con->write_queue, b);
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_file_chunk(server *srv, connection *con,
- glusterfs_file_t fd, off_t offset,
- off_t len, size_t buf_size) {
- chunkqueue *cq;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len);
- }
-
- chunkqueue_append_glusterfs_file (con, fd, offset, len, buf_size);
-
- if ((con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED)
- && (len > 0)) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_mem(server *srv, connection *con,
- char * mem, size_t len,
- size_t buf_size)
-{
- chunkqueue *cq = NULL;
- buffer *buf = NULL;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (len == 0) {
- free (mem);
- if (con->response.transfer_encoding
- & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "0\r\n\r\n", 5 + 1);
- } else {
- chunkqueue_append_mem(cq, "", 1);
- }
- return 0;
- }
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len - 1);
- }
-
- buf = buffer_init ();
-
- buf->used = len + 1;
- buf->size = buf_size;
- buf->ptr = (char *)mem;
- chunkqueue_append_buffer_weak (cq, buf);
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->used)
- return HANDLER_GO_ON;
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr,
- strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- con->http_status = 403;
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
- ctx->fd = glusterfs_open (ctx->glusterfs_path->ptr, O_RDONLY,
- 0);
-
- if (((long)ctx->fd) == 0) {
- con->http_status = 403;
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- access denied due symlink "
- "restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, "Content-Type")) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also "
- * announce that this header field might change in "
- * the seconds few requests. This should fix the
- * aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_STR_LEN("application/"
- "octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Accept-Ranges"),
- CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0
- && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers,
- "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con,
- CONST_STR_LEN("ETag"),
- CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con,
- mtime)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (con->request.http_range && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers,
- "If-Range"))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value,
- con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match
- * the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->file_finished = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
- /*TODO check whether 1 should be subtracted */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size <= (size_t)sce->st.st_size) {
- http_chunk_append_glusterfs_file_chunk (srv, con, ctx->fd, 0,
- sce->st.st_size, size);
- } else {
- http_chunk_append_glusterfs_mem (srv, con, ctx->buf,
- sce->st.st_size, size);
- }
-
- con->http_status = 200;
- con->file_finished = 1;
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
-}
-
-#if 0
-URIHANDLER_FUNC(mod_glusterfs_request_done)
-{
- mod_glusterfs_iobuf_t *cur = first, *prev;
- while (cur) {
- prev = cur;
- glusterfs_free (cur->buf);
- cur = cur->next;
- free (prev);
- }
- first = NULL
- }
-#endif
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_subrequest_start = mod_glusterfs_subrequest;
- // p->handle_request_done = mod_glusterfs_request_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
-
-
-/* mod_glusterfs_stat_cache */
-static stat_cache_entry * stat_cache_entry_init(void) {
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-#ifdef HAVE_FAM_H
-static fam_dir_entry * fam_dir_entry_init(void) {
- fam_dir_entry *fam_dir = NULL;
-
- fam_dir = calloc(1, sizeof(*fam_dir));
- /* ERR_ABORT (fam_dir); */
-
- fam_dir->name = buffer_init();
-
- return fam_dir;
-}
-
-static void fam_dir_entry_free(void *data) {
- fam_dir_entry *fam_dir = data;
-
- if (!fam_dir) return;
-
- FAMCancelMonitor(fam_dir->fc, fam_dir->req);
-
- buffer_free(fam_dir->name);
- free(fam_dir->req);
-
- free(fam_dir);
-}
-#endif
-
-#ifdef HAVE_XATTR
-static int stat_cache_attr_get(buffer *buf, char *name) {
- int attrlen;
- int ret;
-
- attrlen = 1024;
- buffer_prepare_copy(buf, attrlen);
- attrlen--;
- if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
- buf->used = attrlen + 1;
- buf->ptr[attrlen] = '\0';
- }
- return ret;
-}
-#endif
-
-/* the famous DJB hash function for strings */
-static uint32_t hashme(buffer *str) {
- uint32_t hash = 5381;
- const char *s;
- for (s = str->ptr; *s; s++) {
- hash = ((hash << 5) + hash) + *s;
- }
-
- hash &= ~(1 << 31); /* strip the highest bit */
-
- return hash;
-}
-
-
-#ifdef HAVE_LSTAT
-static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
- if (lstat(dname->ptr, lst) == 0) {
- return S_ISLNK(lst->st_mode) ? 0 : 1;
- }
- else {
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "lstat failed for:",
- dname, strerror(errno));
- };
- return -1;
-}
-#endif
-
-/***
- *
- *
- *
- * returns:
- * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
- * - HANDLER_ERROR on stat() failed -> see errno for problem
- */
-
-handler_t glusterfs_stat_cache_get_entry(server *srv,
- connection *con,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
-#ifdef HAVE_FAM_H
- fam_dir_entry *fam_dir = NULL;
- int dir_ndx = -1;
- splay_tree *dir_node = NULL;
-#endif
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- struct stat st;
- size_t k;
-#ifdef DEBUG_STAT_CACHE
- size_t i;
-#endif
- int file_ndx;
- splay_tree *file_node = NULL;
-
- *ret_sce = NULL;
- memset (&st, 0, sizeof (st));
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- file_ndx = hashme(sc->hash_key);
- sc->files = splaytree_splay(sc->files, file_ndx);
-
-#ifdef DEBUG_STAT_CACHE
- for (i = 0; i < ctrl.used; i++) {
- if (ctrl.ptr[i] == file_ndx) break;
- }
-#endif
-
- if (sc->files && (sc->files->key == file_ndx)) {
-#ifdef DEBUG_STAT_CACHE
- /* it was in the cache */
- assert(i < ctrl.used);
-#endif
-
- /* we have seen this file already and
- * don't stat() it again in the same second */
-
- file_node = sc->files;
-
- sce = file_node->data;
-
- /* check if the name is the same, we might have a collision */
-
- if (buffer_is_equal(name, sce->name)) {
- if (srv->srvconf.stat_cache_engine
- == STAT_CACHE_ENGINE_SIMPLE) {
- if (sce->stat_ts == srv->cur_ts && !buf) {
- *ret_sce = sce;
- return HANDLER_GO_ON;
- }
- }
- } else {
- /* oops, a collision,
- *
- * file_node is used by the FAM check below to see if
- * we know this file and if we can save a stat().
- *
- * BUT, the sce is not reset here as the entry into
- * the cache is ok, we it is just not pointing to
- * our requested file.
- */
-
- file_node = NULL;
- }
- } else {
-#ifdef DEBUG_STAT_CACHE
- if (i != ctrl.used) {
- fprintf(stderr, "%s.%d: %08x was already inserted "
- "but not found in cache, %s\n",
- __FILE__, __LINE__, file_ndx, name->ptr);
- }
- assert(i == ctrl.used);
-#endif
- }
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is
- * better
- *
- * */
- if (-1 == glusterfs_get (glusterfs_path->ptr, buf, size, &st)) {
- return HANDLER_ERROR;
- }
-
- if (NULL == sce) {
- int osize = 0;
-
- if (sc->files) {
- osize = sc->files->size;
- }
-
- sce = stat_cache_entry_init();
- buffer_copy_string_buffer(sce->name, name);
-
- sc->files = splaytree_insert(sc->files, file_ndx, sce);
-#ifdef DEBUG_STAT_CACHE
- if (ctrl.size == 0) {
- ctrl.size = 16;
- ctrl.used = 0;
- ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- } else if (ctrl.size == ctrl.used) {
- ctrl.size += 16;
- ctrl.ptr = realloc(ctrl.ptr,
- ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- }
-
- ctrl.ptr[ctrl.used++] = file_ndx;
-
- assert(sc->files);
- assert(sc->files->data == sce);
- assert(osize + 1 == splaytree_size(sc->files));
-#endif
- }
-
- sce->st = st;
- sce->stat_ts = srv->cur_ts;
-
- /* catch the obvious symlinks
- *
- * this is not a secure check as we still have a race-condition between
- * the stat() and the open. We can only solve this by
- * 1. open() the file
- * 2. fstat() the fd
- *
- * and keeping the file open for the rest of the time. But this can
- * only be done at network level.
- *
- * per default it is not a symlink
- * */
-#ifdef HAVE_LSTAT
- sce->is_symlink = 0;
-
- /* we want to only check for symlinks if we should block symlinks.
- */
- if (!con->conf.follow_symlink) {
- if (stat_cache_lstat(srv, name, &lst) == 0) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "found symlink", name);
-#endif
- sce->is_symlink = 1;
- }
-
- /*
- * we assume "/" can not be symlink, so
- * skip the symlink stuff if our path is /
- **/
- else if ((name->used > 2)) {
- buffer *dname;
- char *s_cur;
-
- dname = buffer_init();
- buffer_copy_string_buffer(dname, name);
-
- while ((s_cur = strrchr(dname->ptr,'/'))) {
- *s_cur = '\0';
- dname->used = s_cur - dname->ptr + 1;
- if (dname->ptr == s_cur) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "s", "reached /");
-#endif
- break;
- }
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "checking if", dname, "is a "
- "symlink");
-#endif
- if (stat_cache_lstat(srv, dname, &lst) == 0) {
- sce->is_symlink = 1;
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "sb",
- "found symlink", dname);
-#endif
- break;
- };
- };
- buffer_free(dname);
- };
- };
-#endif
-
- if (S_ISREG(st.st_mode)) {
- /* determine mimetype */
- buffer_reset(sce->content_type);
-
- for (k = 0; k < con->conf.mimetypes->used; k++) {
- data_string *ds = NULL;
- buffer *type = NULL;
-
- ds = (data_string *)con->conf.mimetypes->data[k];
- type = ds->key;
- if (type->used == 0) continue;
-
- /* check if the right side is the same */
- if (type->used > name->used) continue;
-
- if (0 == strncasecmp(name->ptr + name->used - type->used,
- type->ptr, type->used - 1)) {
- buffer_copy_string_buffer(sce->content_type,
- ds->value);
- break;
- }
- }
- etag_create(sce->etag, &(sce->st), con->etag_flags);
-#ifdef HAVE_XATTR
- if (con->conf.use_xattr && buffer_is_empty(sce->content_type)) {
- stat_cache_attr_get(sce->content_type, name->ptr);
- }
-#endif
- } else if (S_ISDIR(st.st_mode)) {
- etag_create(sce->etag, &(sce->st), con->etag_flags);
- }
-
-#ifdef HAVE_FAM_H
- if (sc->fam &&
- (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) {
- /* is this directory already registered ? */
- if (!dir_node) {
- fam_dir = fam_dir_entry_init();
- fam_dir->fc = sc->fam;
-
- buffer_copy_string_buffer(fam_dir->name, sc->dir_name);
-
- fam_dir->version = 1;
-
- fam_dir->req = calloc(1, sizeof(FAMRequest));
- /* ERR_ABORT (fam_dir->req); */
-
- if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr,
- fam_dir->req, fam_dir)) {
-
- log_error_write(srv, __FILE__, __LINE__, "sbsbs",
- "monitoring dir failed:",
- fam_dir->name,
- "file:", name,
- FamErrlist[FAMErrno]);
-
- fam_dir_entry_free(fam_dir);
- } else {
- int osize = 0;
-
- if (sc->dirs) {
- osize = sc->dirs->size;
- }
-
- sc->dirs = splaytree_insert(sc->dirs, dir_ndx,
- fam_dir);
- assert(sc->dirs);
- assert(sc->dirs->data == fam_dir);
- assert(osize == (sc->dirs->size - 1));
- }
- } else {
- fam_dir = dir_node->data;
- }
-
- /* bind the fam_fc to the stat() cache entry */
-
- if (fam_dir) {
- sce->dir_version = fam_dir->version;
- sce->dir_ndx = dir_ndx;
- }
- }
-#endif
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
-}
-
-/**
- * remove stat() from cache which havn't been stat()ed for
- * more than 10 seconds
- *
- *
- * walk though the stat-cache, collect the ids which are too old
- * and remove them in a second loop
- */
-
-static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys,
- size_t *ndx) {
- stat_cache_entry *sce;
-
- if (!t) return 0;
-
- stat_cache_tag_old_entries(srv, t->left, keys, ndx);
- stat_cache_tag_old_entries(srv, t->right, keys, ndx);
-
- sce = t->data;
-
- if (srv->cur_ts - sce->stat_ts > 2) {
- keys[(*ndx)++] = t->key;
- }
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
deleted file mode 100644
index 9d73d6999..000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con,
- buffer *glusterfs_path, buffer *name,
- void *buf, size_t size,
- stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am b/mod_glusterfs/lighttpd/1.5/Makefile.am
deleted file mode 100644
index eda329111..000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff b/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
deleted file mode 100644
index 375696b5d..000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.5/README.txt b/mod_glusterfs/lighttpd/1.5/README.txt
deleted file mode 100644
index bdbdfffbc..000000000
--- a/mod_glusterfs/lighttpd/1.5/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.5, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.5 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.5 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.5/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.5/
-# cd /home/glusterfs/lighttpd-1.5
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
deleted file mode 100644
index 67f7a7eac..000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
+++ /dev/null
@@ -1,1476 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include "joblist.h"
-#include "http_req_range.h"
-#include "connections.h"
-#include "configfile.h"
-
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/* enables debug code for testing if all nodes in the stat-cache as accessable */
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/* Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE results in a network_backend_write of the read data*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-
-/* plugin config for all request/connections */
-
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
-
- /* FIXME: its a pointer, hence cant be short */
- unsigned long handle;
-} plugin_config;
-
-static network_status_t (*network_backend_write)(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
- http_req_range *ranges;
- plugin_config conf;
-} plugin_data;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
- connection *con;
- server *srv;
- plugin_data *p;
-
- union {
- struct {
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_read_buf_t *buf;
- }readv;
-
- struct {
- buffer *name;
- buffer *hash_key;
- size_t size;
- }lookup;
- }fop;
-} glusterfs_async_local_t;
-
-typedef struct {
- unsigned long fd;
- buffer *glusterfs_path;
- void *buf;
- off_t response_content_length;
- int prefix;
-}mod_glusterfs_ctx_t;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_read_buf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-static stat_cache_entry *
-stat_cache_entry_init(void)
-{
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-int chunkqueue_append_glusterfs_mem (chunkqueue *cq, const char * mem, size_t len) {
- buffer *buf = NULL;
-
- buf = chunkqueue_get_append_buffer (cq);
-
- if (buf->ptr)
- free (buf->ptr);
-
- buf->used = len + 1;
- buf->ptr = (char *)mem;
- buf->size = len;
-
- return 0;
-}
-
-static int
-glusterfs_lookup_async_cbk (int op_ret,
- int op_errno,
- void *buf,
- struct stat *st,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- mod_glusterfs_ctx_t *ctx = NULL;
- ctx = local->con->plugin_ctx[local->p->id];
-
- assert (ctx->buf== buf);
-
- if (op_ret || !(S_ISREG (st->st_mode) && (size_t)st->st_size <= local->fop.lookup.size)) {
-
- free (ctx->buf);
- ctx->buf = NULL;
-
- if (op_ret) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- free (ctx);
- local->con->plugin_ctx[local->p->id] = NULL;
-
- if (op_errno == ENOENT)
- local->con->http_status = 404;
- else
- local->con->http_status = 403;
- }
- }
-
- if (!op_ret) {
- stat_cache_entry *sce = NULL;
- stat_cache *sc = local->srv->stat_cache;
-
- sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, local->fop.lookup.hash_key);
-
- if (!sce) {
- sce = stat_cache_entry_init();
-
- buffer_copy_string_buffer(sce->name, local->fop.lookup.name);
- g_hash_table_insert(sc->files, buffer_init_string(BUF_STR(local->fop.lookup.hash_key)), sce);
- }
-
- sce->state = STAT_CACHE_ENTRY_STAT_FINISHED;
- sce->stat_ts = time (NULL);
- memcpy (&sce->st, st, sizeof (*st));
- }
-
- g_async_queue_push (local->srv->joblist_queue, local->con);
- /*
- joblist_append (local->srv, local->con);
- kill (getpid(), SIGUSR1);
- */
- free (local);
- return 0;
-}
-
-static handler_t
-glusterfs_stat_cache_get_entry_async (server *srv,
- connection *con,
- plugin_data *p,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- glusterfs_async_local_t *local = NULL;
-
- *ret_sce = NULL;
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- if ((sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, sc->hash_key))) {
- /* know this entry already */
-
- if (sce->state == STAT_CACHE_ENTRY_STAT_FINISHED &&
- !buf) {
- /* verify that this entry is still fresh */
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
- }
- }
-
-
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is better
- *
- * */
-
- /* pass a job to the stat-queue */
-
- local = calloc (1, sizeof (*local));
- /* ERR_ABORT (local); */
- local->con = con;
- local->srv = srv;
- local->p = p;
- local->fop.lookup.name = buffer_init_buffer (name);
- local->fop.lookup.hash_key = buffer_init_buffer (sc->hash_key);
- local->fop.lookup.size = size;
-
- if (glusterfs_lookup_async ((libglusterfs_handle_t )p->conf.handle, glusterfs_path->ptr, buf, size, glusterfs_lookup_async_cbk, (void *) local)) {
- free (local);
- return HANDLER_ERROR;
- }
-
- return HANDLER_WAIT_FOR_EVENT;
-}
-
-int
-mod_glusterfs_readv_async_cbk (glusterfs_read_buf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->fop.readv.async_read_complete = 1;
- local->fop.readv.buf = buf;
-
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-network_status_t
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- unsigned long fd = (unsigned long)glusterfs_chunk->file.name;
- network_status_t ret;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return NETWORK_STATUS_FATAL_ERROR;
- }
-
- do {
- glusterfs_read_buf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.fop.readv.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.op_ret = local.fop.readv.buf->op_ret;
- local.op_errno = local.fop.readv.buf->op_errno;
-
- local.fop.readv.async_read_complete = 0;
- buf = local.fop.readv.buf;
-
- if ((int)length < 0)
- complete = (local.fop.readv.buf->op_ret <= 0);
- else {
- local.fop.readv.read_bytes += local.fop.readv.buf->op_ret;
- complete = ((local.fop.readv.read_bytes == length) || (local.fop.readv.buf->op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = chunkqueue_get_append_buffer (cq);
-
- nw_write_buf->used = nw_write_buf->size = buf->vector[i].iov_len + 1;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- // buffer_copy_memory (nw_write_buf, buf->vector[i].iov_base, buf->vector[i].iov_len + 1);
- offset += local.op_ret;
- }
-
- ret = network_backend_write (srv, con, con->sock, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.fop.readv.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (qf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start = (char *)gf_cq;
- return ret;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? NETWORK_STATUS_FATAL_ERROR : NETWORK_STATUS_SUCCESS);
-}
-
-network_status_t mod_glusterfs_network_backend_write(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
- network_status_t ret;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- ret = network_backend_write (srv, con, sock, cq);
-
- prev->next = c;
- if (ret != NETWORK_STATUS_SUCCESS) {
- cq->first = first;
- return ret;
- }
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- size_t len;
- mod_glusterfs_chunkqueue *gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- ret = network_backend_write (srv, con, sock, gf_cq->cq);
-
- if ((len = (size_t)chunkqueue_written (gf_cq->cq)) != gf_cq->length) {
- gf_cq->length -= len;
- cq->first = first;
- chunkqueue_remove_finished_chunks (gf_cq->cq);
- return ret;
- }
-
- for (tmp = gf_cq->cq->first ; tmp; tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- ret = mod_glusterfs_read_async (srv, con, c); //c->file.fd, c->file.start, -1);//c->file.length);
- if (c->file.mmap.start) {
- /* pending chunkqueue from mod_glusterfs_read_async to be written to network */
- cq->first = first;
- return ret;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- ret = network_backend_write (srv, con, sock, cq);
-
- cq->first = first;
-
- return ret;
-}
-
-#if 0
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- c->file.start = offset;
- c->file.length = len;
- /* buffer_free (c->file.name); */
-
- /* identify chunk as glusterfs related */
- c->file.mmap.start = MAP_FAILED;
- /* c->file.mmap.length = c->file.mmap.offset = len;*/
-
- return 0;
-}
-#endif
-
-int chunkqueue_append_dummy_mem_chunk (chunkqueue *cq, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem->used = len + 1;
- c->offset = len;
- c->mem->ptr = NULL;
-
- return 0;
-}
-
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- UNUSED (srv);
- p = calloc(1, sizeof(*p));
- /* ERR_ABORT (p); */
- network_backend_write = NULL;
- p->ranges = http_request_range_init();
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
- http_request_range_free (p->ranges);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage); */
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
- s->document_root = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
-
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv, ((data_config *)srv->config_context->data[i])->value, cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con, plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.exclude_exts = NULL;
-
- /* skip the first, the global context */
- /* glusterfs related config can only occur inside $HTTP["url"] == "<glusterfs-prefix>" */
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con, plugin_data *p) {
- int multipart = 0;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- buffer *range = NULL;
- http_req_range *ranges, *r;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
- size_t size = 0;
-
- if (!ctx) {
- return -1;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("Range")))) {
- range = ds->value;
- } else {
- /* we don't have a Range header */
-
- return -1;
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- SEGFAULT();
- }
-
- ctx->response_content_length = con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Content-Type")))) {
- content_type = ds->value;
- }
-
- /* start the range-header parser
- * bytes=<num> */
-
- ranges = p->ranges;
- http_request_range_reset(ranges);
- switch (http_request_range_parse(range, ranges)) {
- case PARSE_ERROR:
- return -1; /* no range valid Range Header */
- case PARSE_SUCCESS:
- break;
- default:
- TRACE("%s", "foobar");
- return -1;
- }
-
- if (ranges->next) {
- multipart = 1;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- /* patch the '-1' */
- for (r = ranges; r; r = r->next) {
- if (r->start == -1) {
- /* -<end>
- *
- * the last <end> bytes */
- r->start = sce->st.st_size - r->end;
- r->end = sce->st.st_size - 1;
- }
- if (r->end == -1) {
- /* <start>-
- * all but the first <start> bytes */
-
- r->end = sce->st.st_size - 1;
- }
-
- if (r->end > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos not present or > size-of-file
- * take the size-of-file
- *
- * */
- r->end = sce->st.st_size - 1;
- }
-
- if (r->start > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if first-byte-pos > file-size, 416
- */
-
- con->http_status = 416;
- return -1;
- }
-
- if (r->start > r->end) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos is present, it has to be >= first-byte-pos
- *
- * invalid ranges have to be handle as no Range specified
- * */
-
- return -1;
- }
- }
-
- if (r) {
- /* we ran into an range violation */
- return -1;
- }
-
- if (multipart) {
- buffer *b;
- for (r = ranges; r; r = r->next) {
- /* write boundary-header */
-
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: bytes ");
- buffer_append_off_t(b, r->start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, r->end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- if ((size_t)sce->st.st_size > size) {
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- } else {
- chunkqueue_append_mem (con->send, ((char *)ctx->buf) + r->start, r->end - r->start + 1);
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
- }
-
- /* add boundary end */
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(p->range_buf));
-
- } else {
- r = ranges;
-
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, r->start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, r->end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"), CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- size_t size = 0;
- handler_t ret = 0;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s", "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.handle <= 0) {
- glusterfs_init_ctx_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- p->conf.handle = (unsigned long)glusterfs_init (&ctx);
-
- if (p->conf.handle <= 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs", "glusterfs initialization failed, please check your configuration. Glusterfs logfile ", p->conf.logfile, "might contain details");
- return HANDLER_FINISHED;
- }
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path, p->conf.document_root);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path, con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path, plugin_ctx->glusterfs_path);
-
- ret = glusterfs_stat_cache_get_entry_async (srv, con, p, plugin_ctx->glusterfs_path, con->physical.path, plugin_ctx->buf, size, &sce);
-
- if (ret == HANDLER_ERROR) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
-
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- con->http_status = 500;
- ret = HANDLER_FINISHED;
- }
-
- return ret;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- unsigned long fd;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr)
- return HANDLER_GO_ON;
-
- if (!ctx) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr, strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- con->http_status = 403;
-
- /* this might happen if the sce is removed from stat-cache after a successful glusterfs_lookup */
- if (ctx) {
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
-
- fd = glusterfs_open ((libglusterfs_handle_t ) ((unsigned long)p->conf.handle), ctx->glusterfs_path->ptr, O_RDONLY, 0);
-
- if (!fd) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- con->http_status = 403;
- return HANDLER_FINISHED;
- }
- ctx->fd = fd;
- }
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- access denied due symlink restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb", "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, CONST_STR_LEN("Content-Type"))) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also announce that
- * this header field might change in the seconds few requests
- *
- * This should fix the aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_STR_LEN("application/octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con, CONST_STR_LEN("Accept-Ranges"), CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0 && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers, "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con, CONST_STR_LEN("ETag"), CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con, mtime)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (NULL != array_get_element(con->request.headers, CONST_STR_LEN("Range")) && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Last-Modified")))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("If-Range")))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->send->is_closed = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size < (size_t)sce->st.st_size) {
- chunkqueue_append_glusterfs_file (con->send_raw, fd, 0, sce->st.st_size);
- con->send_raw->bytes_in += sce->st.st_size;
- chunkqueue_append_dummy_mem_chunk (con->send, sce->st.st_size);
- } else {
- if (!ctx->buf) {
- con->http_status = 404;
- return HANDLER_ERROR;
- }
- chunkqueue_append_glusterfs_mem (con->send, ctx->buf, sce->st.st_size);
- ctx->buf = NULL;
- }
- ctx->response_content_length = con->response.content_length = sce->st.st_size;
-
- con->send->is_closed = 1;
- con->send->bytes_in = sce->st.st_size;
-
- return HANDLER_FINISHED;
-}
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_response_done) {
- plugin_data *p = p_d;
- UNUSED (srv);
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- con->plugin_ctx[p->id] = NULL;
- if (ctx->glusterfs_path) {
- free (ctx->glusterfs_path);
- }
-
- free (ctx);
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_start_backend = mod_glusterfs_subrequest;
- p->handle_response_done = mod_glusterfs_response_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
deleted file mode 100644
index 5f9bb2c5b..000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con, libglusterfs_handle_t handle, buffer *glusterfs_path, buffer *name, void *buf, size_t size, stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/Makefile.am b/mod_glusterfs/lighttpd/Makefile.am
deleted file mode 100644
index c934412b3..000000000
--- a/mod_glusterfs/lighttpd/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = 1.4 1.5
-
-CLEANFILES =
diff --git a/rfc.sh b/rfc.sh
index b06ac512d..baf700495 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -1,7 +1,7 @@
#!/bin/sh -e
-branch="master";
+branch="development";
set_hooks_commit_msg()
@@ -16,6 +16,9 @@ set_hooks_commit_msg()
curl -o $f $u || wget -O $f $u;
chmod +x .git/hooks/commit-msg;
+
+ # Let the 'Change-Id: ' header get assigned on first run of rfc.sh
+ GIT_EDITOR=true git commit --amend;
}
@@ -82,13 +85,13 @@ assert_diverge()
main()
{
+ set_hooks_commit_msg;
+
if [ -e "$1" ]; then
editor_mode "$@";
return;
fi
- set_hooks_commit_msg;
-
rebase_changes;
assert_diverge;
diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
index 8b087301c..f19c3c8a4 100644
--- a/rpc/rpc-lib/src/Makefile.am
+++ b/rpc/rpc-lib/src/Makefile.am
@@ -1,16 +1,19 @@
lib_LTLIBRARIES = libgfrpc.la
libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
- rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c
+ rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
+ rpc-drc.c
libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
- rpc-clnt.h rpcsvc-common.h protocol-common.h
+ rpc-clnt.h rpcsvc-common.h protocol-common.h rpc-drc.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src \
- -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\"
+ -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\" \
+ -I$(top_srcdir)/contrib/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
index 5f41c8296..db488434c 100644
--- a/rpc/rpc-lib/src/auth-glusterfs.c
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -64,9 +64,9 @@ int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv)
struct auth_glusterfs_parms au = {0,};
int ret = RPCSVC_AUTH_REJECT;
- int gidcount = 0;
int j = 0;
int i = 0;
+ int gidcount = 0;
if (!req)
return ret;
@@ -96,9 +96,27 @@ int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv)
goto err;
}
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount,
+ sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log ("auth-glusterfs", GF_LOG_WARNING,
+ "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
req->auxgids[gidcount] = au.groups[gidcount];
+ RPC_AUTH_ROOT_SQUASH(req);
+
gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
", gid: %d, owner: %s",
req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
@@ -201,22 +219,38 @@ int auth_glusterfs_v2_authenticate (rpcsvc_request_t *req, void *priv)
goto err;
}
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount,
+ sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log ("auth-glusterfs-v2", GF_LOG_WARNING,
+ "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
for (i = 0; i < req->auxgidcount; ++i)
req->auxgids[i] = au.groups.groups_val[i];
for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+ RPC_AUTH_ROOT_SQUASH(req);
+
gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
", gid: %d, owner: %s",
req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
ret = RPCSVC_AUTH_ACCEPT;
err:
/* TODO: instead use alloca() for these variables */
- if (au.groups.groups_val)
- free (au.groups.groups_val);
- if (au.lk_owner.lk_owner_val)
- free (au.lk_owner.lk_owner_val);
+ free (au.groups.groups_val);
+ free (au.lk_owner.lk_owner_val);
return ret;
}
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
index 6251d60a8..fa5f0576e 100644
--- a/rpc/rpc-lib/src/auth-unix.c
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -42,6 +42,7 @@ int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
if (!req)
return ret;
+ req->auxgids = req->auxgidsmall;
ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
&aup, machname, req->auxgids);
if (ret == -1) {
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index e2815d8c1..8bef906cc 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -56,6 +56,9 @@ enum gf_fop_procnum {
GFS3_OP_RELEASE,
GFS3_OP_RELEASEDIR,
GFS3_OP_FREMOVEXATTR,
+ GFS3_OP_FALLOCATE,
+ GFS3_OP_DISCARD,
+ GFS3_OP_ZEROFILL,
GFS3_OP_MAXVALUE,
} ;
@@ -94,8 +97,10 @@ enum gf_probe_resp {
GF_PROBE_FRIEND,
GF_PROBE_ANOTHER_CLUSTER,
GF_PROBE_VOLUME_CONFLICT,
+ GF_PROBE_SAME_UUID,
GF_PROBE_UNKNOWN_PEER,
- GF_PROBE_ADD_FAILED
+ GF_PROBE_ADD_FAILED,
+ GF_PROBE_QUORUM_NOT_MET
};
enum gf_deprobe_resp {
@@ -103,7 +108,8 @@ enum gf_deprobe_resp {
GF_DEPROBE_LOCALHOST,
GF_DEPROBE_NOT_FRIEND,
GF_DEPROBE_BRICK_EXIST,
- GF_DEPROBE_FRIEND_DOWN
+ GF_DEPROBE_FRIEND_DOWN,
+ GF_DEPROBE_QUORUM_NOT_MET,
};
enum gf_cbk_procnum {
@@ -150,6 +156,11 @@ enum gluster_cli_procnum {
GLUSTER_CLI_STATEDUMP_VOLUME,
GLUSTER_CLI_LIST_VOLUME,
GLUSTER_CLI_CLRLOCKS_VOLUME,
+ GLUSTER_CLI_UUID_RESET,
+ GLUSTER_CLI_UUID_GET,
+ GLUSTER_CLI_COPY_FILE,
+ GLUSTER_CLI_SYS_EXEC,
+ GLUSTER_CLI_SNAP,
GLUSTER_CLI_MAXVALUE,
};
@@ -181,9 +192,17 @@ enum glusterd_brick_procnum {
GLUSTERD_BRICK_XLATOR_DEFRAG,
GLUSTERD_NODE_PROFILE,
GLUSTERD_NODE_STATUS,
+ GLUSTERD_VOLUME_BARRIER_OP,
GLUSTERD_BRICK_MAXVALUE,
};
+enum glusterd_mgmt_hndsk_procnum {
+ GD_MGMT_HNDSK_NULL,
+ GD_MGMT_HNDSK_VERSIONS,
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ GD_MGMT_HNDSK_MAXVALUE,
+};
+
typedef enum {
GF_AFR_OP_INVALID,
GF_AFR_OP_HEAL_INDEX,
@@ -191,16 +210,25 @@ typedef enum {
GF_AFR_OP_INDEX_SUMMARY,
GF_AFR_OP_HEALED_FILES,
GF_AFR_OP_HEAL_FAILED_FILES,
- GF_AFR_OP_SPLIT_BRAIN_FILES
+ GF_AFR_OP_SPLIT_BRAIN_FILES,
+ GF_AFR_OP_STATISTICS,
+ GF_AFR_OP_STATISTICS_HEAL_COUNT,
+ GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA,
} gf_xl_afr_op_t ;
-enum gf_hdsk_event_notify_op {
- GF_EN_DEFRAG_STATUS,
- GF_EN_MAX,
+enum glusterd_mgmt_v3_procnum {
+ GLUSTERD_MGMT_V3_NULL, /* 0 */
+ GLUSTERD_MGMT_V3_LOCK,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ GLUSTERD_MGMT_V3_MAXVALUE,
};
#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
-#define GLUSTER_HNDSK_VERSION 2 /* 0.0.1 */
+#define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */
#define GLUSTER_PMAP_PROGRAM 34123456
#define GLUSTER_PMAP_VERSION 1
@@ -208,9 +236,9 @@ enum gf_hdsk_event_notify_op {
#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
-#define GLUSTER3_1_FOP_PROGRAM 1298437 /* Completely random */
-#define GLUSTER3_1_FOP_VERSION 330 /* 3.3.0 */
-#define GLUSTER3_1_FOP_PROCCNT GFS3_OP_MAXVALUE
+#define GLUSTER_FOP_PROGRAM 1298437 /* Completely random */
+#define GLUSTER_FOP_VERSION 330 /* 3.3.0 */
+#define GLUSTER_FOP_PROCCNT GFS3_OP_MAXVALUE
/* Second version */
#define GD_MGMT_PROGRAM 1238433 /* Completely random */
@@ -225,4 +253,12 @@ enum gf_hdsk_event_notify_op {
#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
#define GD_BRICK_VERSION 2
+/* Third version */
+#define GD_MGMT_V3_PROGRAM 2210013 /* Completely random */
+#define GD_MGMT_V3_VERSION 3
+
+/* OP-VERSION handshake */
+#define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */
+#define GD_MGMT_HNDSK_VERSION 1
+
#endif /* !_PROTOCOL_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index c04510586..ac98a5c91 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -66,8 +66,8 @@ _is_lock_fop (struct saved_frame *sframe)
{
int fop = 0;
- if (SFRAME_GET_PROGNUM (sframe) == GLUSTER3_1_FOP_PROGRAM &&
- SFRAME_GET_PROGVER (sframe) == GLUSTER3_1_FOP_VERSION)
+ if (SFRAME_GET_PROGNUM (sframe) == GLUSTER_FOP_PROGRAM &&
+ SFRAME_GET_PROGVER (sframe) == GLUSTER_FOP_VERSION)
fop = SFRAME_GET_PROCNUM (sframe);
return ((fop == GFS3_OP_LK) ||
@@ -144,9 +144,8 @@ call_bail (void *data)
struct saved_frame *saved_frame = NULL;
struct saved_frame *trav = NULL;
struct saved_frame *tmp = NULL;
- struct tm frame_sent_tm;
char frame_sent[256] = {0,};
- struct timeval timeout = {0,};
+ struct timespec timeout = {0,};
struct iovec iov = {0,};
GF_VALIDATE_OR_GOTO ("client", data, out);
@@ -164,7 +163,7 @@ call_bail (void *data)
call-once timer */
if (conn->timer) {
timeout.tv_sec = 10;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
gf_timer_call_cancel (clnt->ctx, conn->timer);
conn->timer = gf_timer_call_after (clnt->ctx,
@@ -174,7 +173,8 @@ call_bail (void *data)
if (conn->timer == NULL) {
gf_log (conn->trans->name, GF_LOG_WARNING,
- "Cannot create bailout timer");
+ "Cannot create bailout timer for %s",
+ conn->trans->peerinfo.identifier);
}
}
@@ -191,21 +191,21 @@ call_bail (void *data)
pthread_mutex_unlock (&conn->lock);
list_for_each_entry_safe (trav, tmp, &list, list) {
- localtime_r (&trav->saved_at.tv_sec, &frame_sent_tm);
- strftime (frame_sent, 32, "%Y-%m-%d %H:%M:%S", &frame_sent_tm);
+ gf_time_fmt (frame_sent, sizeof frame_sent,
+ trav->saved_at.tv_sec, gf_timefmt_FT);
snprintf (frame_sent + strlen (frame_sent),
256 - strlen (frame_sent),
".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
gf_log (conn->trans->name, GF_LOG_ERROR,
- "bailing out frame type(%s) op(%s(%d)) xid = 0x%ux "
- "sent = %s. timeout = %d",
+ "bailing out frame type(%s) op(%s(%d)) xid = 0x%x "
+ "sent = %s. timeout = %d for %s",
trav->rpcreq->prog->progname,
(trav->rpcreq->prog->procnames) ?
trav->rpcreq->prog->procnames[trav->rpcreq->procnum] :
"--",
trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent,
- conn->frame_timeout);
+ conn->frame_timeout, conn->trans->peerinfo.identifier);
clnt = rpc_clnt_ref (clnt);
trav->rpcreq->rpc_status = -1;
@@ -227,7 +227,7 @@ __save_frame (struct rpc_clnt *rpc_clnt, call_frame_t *frame,
struct rpc_req *rpcreq)
{
rpc_clnt_connection_t *conn = NULL;
- struct timeval timeout = {0, };
+ struct timespec timeout = {0, };
struct saved_frame *saved_frame = NULL;
conn = &rpc_clnt->conn;
@@ -241,7 +241,7 @@ __save_frame (struct rpc_clnt *rpc_clnt, call_frame_t *frame,
/* TODO: make timeout configurable */
if (conn->timer == NULL) {
timeout.tv_sec = 10;
- timeout.tv_usec = 0;
+ timeout.tv_nsec = 0;
conn->timer = gf_timer_call_after (rpc_clnt->ctx,
timeout,
call_bail,
@@ -340,20 +340,16 @@ out:
void
saved_frames_unwind (struct saved_frames *saved_frames)
{
- struct rpc_clnt *clnt = NULL;
struct saved_frame *trav = NULL;
struct saved_frame *tmp = NULL;
- struct tm *frame_sent_tm = NULL;
- char timestr[256] = {0,};
-
+ char timestr[1024] = {0,};
struct iovec iov = {0,};
list_splice_init (&saved_frames->lk_sf.list, &saved_frames->sf.list);
list_for_each_entry_safe (trav, tmp, &saved_frames->sf.list, list) {
- frame_sent_tm = localtime (&trav->saved_at.tv_sec);
- strftime (timestr, sizeof(timestr), "%Y-%m-%d %H:%M:%S",
- frame_sent_tm);
+ gf_time_fmt (timestr, sizeof timestr,
+ trav->saved_at.tv_sec, gf_timefmt_FT);
snprintf (timestr + strlen (timestr),
sizeof(timestr) - strlen (timestr),
".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
@@ -364,7 +360,7 @@ saved_frames_unwind (struct saved_frames *saved_frames)
gf_log_callingfn (trav->rpcreq->conn->trans->name,
GF_LOG_ERROR,
"forced unwinding frame type(%s) op(%s(%d)) "
- "called at %s (xid=0x%ux)",
+ "called at %s (xid=0x%x)",
trav->rpcreq->prog->progname,
((trav->rpcreq->prog->procnames) ?
trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
@@ -373,14 +369,12 @@ saved_frames_unwind (struct saved_frames *saved_frames)
trav->rpcreq->xid);
saved_frames->count--;
- clnt = rpc_clnt_ref (trav->rpcreq->conn->rpc_clnt);
trav->rpcreq->rpc_status = -1;
trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
rpc_clnt_reply_deinit (trav->rpcreq,
trav->rpcreq->conn->rpc_clnt->reqpool);
- clnt = rpc_clnt_unref (clnt);
list_del_init (&trav->list);
mem_put (trav);
}
@@ -404,7 +398,7 @@ rpc_clnt_reconnect (void *trans_ptr)
{
rpc_transport_t *trans = NULL;
rpc_clnt_connection_t *conn = NULL;
- struct timeval tv = {0, 0};
+ struct timespec ts = {0, 0};
int32_t ret = 0;
struct rpc_clnt *clnt = NULL;
@@ -423,23 +417,15 @@ rpc_clnt_reconnect (void *trans_ptr)
conn->reconnect = 0;
if (conn->connected == 0) {
- tv.tv_sec = 3;
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
gf_log (trans->name, GF_LOG_TRACE,
"attempting reconnect");
ret = rpc_transport_connect (trans,
conn->config.remote_port);
- /* Every time there is a disconnection, processes
- should try to connect to 'glusterd' (ie, default
- port) or whichever port given as 'option remote-port'
- in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- for just one successful attempt */
- if (!ret)
- conn->config.remote_port = 0;
-
conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
+ gf_timer_call_after (clnt->ctx, ts,
rpc_clnt_reconnect,
trans);
} else {
@@ -460,7 +446,7 @@ rpc_clnt_reconnect (void *trans_ptr)
int
rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info)
{
- struct saved_frame saved_frame = {{}, 0};
+ struct saved_frame saved_frame;
int ret = -1;
pthread_mutex_lock (&clnt->conn.lock);
@@ -677,15 +663,13 @@ rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
}
gf_log (conn->trans->name, GF_LOG_TRACE,
- "received rpc message (RPC XID: 0x%ux"
+ "received rpc message (RPC XID: 0x%x"
" Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
saved_frame->rpcreq->xid,
saved_frame->rpcreq->prog->progname,
saved_frame->rpcreq->prog->progver,
saved_frame->rpcreq->procnum, conn->trans->name);
- req->rpc_status = 0;
-
out:
if (ret != 0) {
req->rpc_status = -1;
@@ -742,7 +726,8 @@ rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
if (found && (procnum < program->numactors) &&
(program->actors[procnum].actor)) {
- program->actors[procnum].actor (&progmsg);
+ program->actors[procnum].actor (clnt, program->mydata,
+ &progmsg);
}
out:
@@ -836,6 +821,9 @@ out:
return;
}
+static void
+rpc_clnt_destroy (struct rpc_clnt *rpc);
+
int
rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
rpc_transport_event_t event, void *data, ...)
@@ -845,7 +833,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
int ret = -1;
rpc_request_info_t *req_info = NULL;
rpc_transport_pollin_t *pollin = NULL;
- struct timeval tv = {0, };
+ struct timespec ts = {0, };
conn = mydata;
if (conn == NULL) {
@@ -864,10 +852,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
{
if (!conn->rpc_clnt->disabled
&& (conn->reconnect == NULL)) {
- tv.tv_sec = 10;
+ ts.tv_sec = 10;
+ ts.tv_nsec = 0;
conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
+ gf_timer_call_after (clnt->ctx, ts,
rpc_clnt_reconnect,
conn->trans);
}
@@ -881,9 +870,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
}
case RPC_TRANSPORT_CLEANUP:
- /* this event should not be received on a client for, a
- * transport is only disconnected, but never destroyed.
- */
+ rpc_clnt_destroy (clnt);
ret = 0;
break;
@@ -927,6 +914,14 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
case RPC_TRANSPORT_CONNECT:
{
+ /* Every time there is a disconnection, processes
+ should try to connect to 'glusterd' (ie, default
+ port) or whichever port given as 'option remote-port'
+ in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ for just one successful attempt */
+ conn->config.remote_port = 0;
+
if (clnt->notifyfn)
ret = clnt->notifyfn (clnt, clnt->mydata,
RPC_CLNT_CONNECT, NULL);
@@ -953,7 +948,7 @@ rpc_clnt_connection_deinit (rpc_clnt_connection_t *conn)
}
-inline int
+static inline int
rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
dict_t *options, char *name)
{
@@ -1326,7 +1321,7 @@ out:
int
rpcclnt_cbk_program_register (struct rpc_clnt *clnt,
- rpcclnt_cb_program_t *program)
+ rpcclnt_cb_program_t *program, void *mydata)
{
int ret = -1;
char already_registered = 0;
@@ -1366,6 +1361,8 @@ rpcclnt_cbk_program_register (struct rpc_clnt *clnt,
memcpy (tmp, program, sizeof (*tmp));
INIT_LIST_HEAD (&tmp->program);
+ tmp->mydata = mydata;
+
pthread_mutex_lock (&clnt->lock);
{
list_add_tail (&tmp->program, &clnt->programs);
@@ -1481,10 +1478,6 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
if (conn->connected == 0) {
ret = rpc_transport_connect (conn->trans,
conn->config.remote_port);
- /* Below code makes sure the (re-)configured port lasts
- for just one successful connect attempt */
- if (!ret)
- conn->config.remote_port = 0;
}
ret = rpc_transport_submit_request (rpc->conn.trans,
@@ -1492,19 +1485,18 @@ rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
if (ret == -1) {
gf_log (conn->trans->name, GF_LOG_WARNING,
"failed to submit rpc-request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
+ "(XID: 0x%x Program: %s, ProgVers: %d, "
"Proc: %d) to rpc-transport (%s)", rpcreq->xid,
rpcreq->prog->progname, rpcreq->prog->progver,
rpcreq->procnum, rpc->conn.trans->name);
}
if ((ret >= 0) && frame) {
- gettimeofday (&conn->last_sent, NULL);
/* Save the frame in queue */
__save_frame (rpc, frame, rpcreq);
gf_log ("rpc-clnt", GF_LOG_TRACE, "submitted request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
+ "(XID: 0x%x Program: %s, ProgVers: %d, "
"Proc: %d) to rpc-transport (%s)", rpcreq->xid,
rpcreq->prog->progname, rpcreq->prog->progver,
rpcreq->procnum, rpc->conn.trans->name);
@@ -1553,18 +1545,21 @@ rpc_clnt_ref (struct rpc_clnt *rpc)
static void
-rpc_clnt_destroy (struct rpc_clnt *rpc)
+rpc_clnt_trigger_destroy (struct rpc_clnt *rpc)
{
if (!rpc)
return;
- if (rpc->conn.trans) {
- rpc_transport_unregister_notify (rpc->conn.trans);
- rpc_transport_disconnect (rpc->conn.trans);
- rpc_transport_unref (rpc->conn.trans);
- }
+ rpc_clnt_disable (rpc);
+ rpc_transport_unref (rpc->conn.trans);
+}
+
+static void
+rpc_clnt_destroy (struct rpc_clnt *rpc)
+{
+ if (!rpc)
+ return;
- rpc_clnt_reconnect_cleanup (&rpc->conn);
saved_frames_destroy (rpc->conn.saved_frames);
pthread_mutex_destroy (&rpc->lock);
pthread_mutex_destroy (&rpc->conn.lock);
@@ -1591,13 +1586,36 @@ rpc_clnt_unref (struct rpc_clnt *rpc)
}
pthread_mutex_unlock (&rpc->lock);
if (!count) {
- rpc_clnt_destroy (rpc);
+ rpc_clnt_trigger_destroy (rpc);
return NULL;
}
return rpc;
}
+char
+rpc_clnt_is_disabled (struct rpc_clnt *rpc)
+{
+
+ rpc_clnt_connection_t *conn = NULL;
+ char disabled = 0;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ disabled = rpc->disabled;
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+out:
+ return disabled;
+}
+
void
rpc_clnt_disable (struct rpc_clnt *rpc)
{
@@ -1667,7 +1685,7 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config)
if (strcmp (rpc->conn.config.remote_host,
config->remote_host))
gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing port to %s (from %s)",
+ "changing hostname to %s (from %s)",
config->remote_host,
rpc->conn.config.remote_host);
FREE (rpc->conn.config.remote_host);
@@ -1680,54 +1698,3 @@ rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config)
rpc->conn.config.remote_host = gf_strdup (config->remote_host);
}
}
-
-int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath)
-{
- dict_t *dict = NULL;
- char *fpath = NULL;
- int ret = -1;
-
- GF_ASSERT (filepath);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- fpath = gf_strdup (filepath);
- if (!fpath) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (dict, "transport.socket.connect-path", fpath);
- if (ret)
- goto out;
-
- ret = dict_set_str (dict, "transport.address-family", "unix");
- if (ret)
- goto out;
-
- ret = dict_set_str (dict, "transport.socket.nodelay", "off");
- if (ret)
- goto out;
-
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret)
- goto out;
-
- ret = dict_set_str (dict, "transport.socket.keepalive", "off");
- if (ret)
- goto out;
-
- *options = dict;
-out:
- if (ret) {
- if (fpath)
- GF_FREE (fpath);
- if (dict)
- dict_unref (dict);
- }
- return ret;
-}
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index e7335e388..584963ad0 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -78,7 +78,7 @@ typedef struct rpc_clnt_program {
int numproc;
} rpc_clnt_prog_t;
-typedef int (*rpcclnt_cb_fn) (void *data);
+typedef int (*rpcclnt_cb_fn) (struct rpc_clnt *rpc, void *mydata, void *data);
/* The descriptor for each procedure/actor that runs
* over the RPC service.
@@ -106,6 +106,9 @@ typedef struct rpcclnt_cb_program {
/* list member to link to list of registered services with rpc_clnt */
struct list_head program;
+
+ /* Needed for passing back in cb_actor */
+ void *mydata;
} rpcclnt_cb_program_t;
@@ -232,12 +235,12 @@ void rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config);
* procedure handlers.
*/
int rpcclnt_cbk_program_register (struct rpc_clnt *svc,
- rpcclnt_cb_program_t *program);
-
-int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath);
+ rpcclnt_cb_program_t *program, void *mydata);
void
rpc_clnt_disable (struct rpc_clnt *rpc);
+char
+rpc_clnt_is_disabled (struct rpc_clnt *rpc);
+
#endif /* !_RPC_CLNT_H */
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
new file mode 100644
index 000000000..8181e6aee
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -0,0 +1,872 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc.h"
+#ifndef RPC_DRC_H
+#include "rpc-drc.h"
+#endif
+#include "locking.h"
+#include "hashfn.h"
+#include "common-utils.h"
+#include "statedump.h"
+#include "mem-pool.h"
+
+#include <netinet/in.h>
+#include <unistd.h>
+
+/**
+ * rpcsvc_drc_op_destroy - Destroys the cached reply
+ *
+ * @param drc - the main drc structure
+ * @param reply - the cached reply to destroy
+ * @return NULL if reply is destroyed, reply otherwise
+ */
+static drc_cached_op_t *
+rpcsvc_drc_op_destroy (rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (reply);
+
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ return reply;
+
+ iobref_unref (reply->msg.iobref);
+ if (reply->msg.rpchdr)
+ GF_FREE (reply->msg.rpchdr);
+ if (reply->msg.proghdr)
+ GF_FREE (reply->msg.proghdr);
+ if (reply->msg.progpayload)
+ GF_FREE (reply->msg.progpayload);
+
+ list_del (&reply->global_list);
+ reply->client->op_count--;
+ drc->op_count--;
+ mem_put (reply);
+ reply = NULL;
+
+ return reply;
+}
+
+/**
+ * rpcsvc_drc_op_rb_unref - This function is used in rb tree cleanup only
+ *
+ * @param reply - the cached reply to unref
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_drc_rb_op_destroy (void *reply, void *drc)
+{
+ rpcsvc_drc_op_destroy (drc, (drc_cached_op_t *)reply);
+}
+
+/**
+ * rpcsvc_remove_drc_client - Cleanup the drc client
+ *
+ * @param client - the drc client to be removed
+ * @return void
+ */
+static void
+rpcsvc_remove_drc_client (drc_client_t *client)
+{
+ rb_destroy (client->rbtree, rpcsvc_drc_rb_op_destroy);
+ list_del (&client->client_list);
+ GF_FREE (client);
+}
+
+/**
+ * rpcsvc_client_lookup - Given a sockaddr_storage, find the client if it exists
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - the network address of the client to be looked up
+ * @return drc client if it exists, NULL otherwise
+ */
+static drc_client_t *
+rpcsvc_client_lookup (rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (sockaddr);
+
+ if (list_empty (&drc->clients_head))
+ return NULL;
+
+ list_for_each_entry (client, &drc->clients_head, client_list) {
+ if (gf_sock_union_equal_addr (&client->sock_union,
+ (union gf_sock_union *)sockaddr))
+ return client;
+ }
+
+ return NULL;
+}
+
+/**
+ * drc_compare_reqs - Used by rbtree to determine if incoming req matches with
+ * an existing node(cached reply) in rbtree
+ *
+ * @param item - pointer to the incoming req
+ * @param rb_node_data - pointer to an rbtree node (cached reply)
+ * @param param - drc pointer - unused here, but used in *op_destroy
+ * @return 0 if req matches reply, else (req->xid - reply->xid)
+ */
+int
+drc_compare_reqs (const void *item, const void *rb_node_data, void *param)
+{
+ int ret = -1;
+ rpcsvc_request_t *req = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (item);
+ GF_ASSERT (rb_node_data);
+ GF_ASSERT (param);
+
+ req = (rpcsvc_request_t *)item;
+ reply = (drc_cached_op_t *)rb_node_data;
+
+ ret = req->xid - reply->xid;
+ if (ret != 0)
+ return ret;
+
+ if (req->prognum == reply->prognum &&
+ req->procnum == reply->procnum &&
+ req->progver == reply->progversion)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * drc_rb_calloc - used by rbtree api to allocate memory for nodes
+ *
+ * @param allocator - the libavl_allocator structure used by rbtree
+ * @param size - not needed by this function
+ * @return pointer to new cached reply (node in rbtree)
+ */
+static void *
+drc_rb_calloc (struct libavl_allocator *allocator, size_t size)
+{
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ /* get the drc pointer by simple typecast, since allocator
+ * is the first member of rpcsvc_drc_globals_t
+ */
+ drc = (rpcsvc_drc_globals_t *)allocator;
+
+ return mem_get (drc->mempool);
+}
+
+/**
+ * drc_rb_free - used by rbtree api to free a node
+ *
+ * @param a - the libavl_allocator structure used by rbtree api
+ * @param block - node that needs to be freed
+ * @return void
+ */
+static void
+drc_rb_free (struct libavl_allocator *a, void *block)
+{
+ mem_put (block);
+}
+
+/**
+ * drc_init_client_cache - initialize a drc client and its rb tree
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to be initialized
+ * @return 0 on success, -1 on failure
+ */
+static int
+drc_init_client_cache (rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (client);
+
+ drc->allocator.libavl_malloc = drc_rb_calloc;
+ drc->allocator.libavl_free = drc_rb_free;
+
+ client->rbtree = rb_create (drc_compare_reqs, drc,
+ (struct libavl_allocator *)drc);
+ if (!client->rbtree) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "rb tree creation failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * rpcsvc_get_drc_client - find the drc client with given sockaddr, else
+ * allocate and initialize a new drc client
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - network address of client
+ * @return drc client on success, NULL on failure
+ */
+static drc_client_t *
+rpcsvc_get_drc_client (rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (sockaddr);
+
+ client = rpcsvc_client_lookup (drc, sockaddr);
+ if (client)
+ goto out;
+
+ /* if lookup fails, allocate cache for the new client */
+ client = GF_CALLOC (1, sizeof (drc_client_t),
+ gf_common_mt_drc_client_t);
+ if (!client)
+ goto out;
+
+ client->ref = 0;
+ client->sock_union = (union gf_sock_union)*sockaddr;
+ client->op_count = 0;
+
+ if (drc_init_client_cache (drc, client)) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG,
+ "initialization of drc client failed");
+ GF_FREE (client);
+ client = NULL;
+ goto out;
+ }
+ drc->client_count++;
+
+ list_add (&client->client_list, &drc->clients_head);
+
+ out:
+ return client;
+}
+
+/**
+ * rpcsvc_need_drc - Determine if a request needs DRC service
+ *
+ * @param req - incoming request
+ * @return 1 if DRC is needed for req, 0 otherwise
+ */
+int
+rpcsvc_need_drc (rpcsvc_request_t *req)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (req->svc);
+
+ drc = req->svc->drc;
+
+ if (!drc || drc->status == DRC_UNINITIATED)
+ return 0;
+
+ actor = rpcsvc_program_actor (req);
+ if (!actor)
+ return 0;
+
+ return (actor->op_type == DRC_NON_IDEMPOTENT
+ && drc->type != DRC_TYPE_NONE);
+}
+
+/**
+ * rpcsvc_drc_client_ref - ref the drc client
+ *
+ * @param client - the drc client to ref
+ * @return client
+ */
+static drc_client_t *
+rpcsvc_drc_client_ref (drc_client_t *client)
+{
+ GF_ASSERT (client);
+ client->ref++;
+ return client;
+}
+
+/**
+ * rpcsvc_drc_client_unref - unref the drc client, and destroy
+ * the client on last unref
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to unref
+ * @return NULL if it is the last unref, client otherwise
+ */
+static drc_client_t *
+rpcsvc_drc_client_unref (rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT (drc);
+ GF_ASSERT (client->ref);
+
+ client->ref--;
+ if (!client->ref) {
+ drc->client_count--;
+ rpcsvc_remove_drc_client (client);
+ client = NULL;
+ }
+
+ return client;
+}
+
+/**
+ * rpcsvc_drc_lookup - lookup a request to see if it is already cached
+ *
+ * @param req - incoming request
+ * @return cached reply of req if found, NULL otherwise
+ */
+drc_cached_op_t *
+rpcsvc_drc_lookup (rpcsvc_request_t *req)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (req);
+
+ if (!req->trans->drc_client) {
+ client = rpcsvc_get_drc_client (req->svc->drc,
+ &req->trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+ req->trans->drc_client = client;
+ }
+
+ client = rpcsvc_drc_client_ref (req->trans->drc_client);
+
+ if (client->op_count == 0)
+ goto out;
+
+ reply = rb_find (client->rbtree, req);
+
+ out:
+ if (client)
+ rpcsvc_drc_client_unref (req->svc->drc, client);
+
+ return reply;
+}
+
+/**
+ * rpcsvc_send_cached_reply - send the cached reply for the incoming request
+ *
+ * @param req - incoming request (which is a duplicate in this case)
+ * @param reply - the cached reply for req
+ * @return 0 on successful reply submission, -1 or other non-zero value otherwise
+ */
+int
+rpcsvc_send_cached_reply (rpcsvc_request_t *req, drc_cached_op_t *reply)
+{
+ int ret = 0;
+
+ GF_ASSERT (req);
+ GF_ASSERT (reply);
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "sending cached reply: xid: %d, "
+ "client: %s", req->xid, req->trans->peerinfo.identifier);
+
+ rpcsvc_drc_client_ref (reply->client);
+ ret = rpcsvc_transport_submit (req->trans,
+ reply->msg.rpchdr, reply->msg.rpchdrcount,
+ reply->msg.proghdr, reply->msg.proghdrcount,
+ reply->msg.progpayload, reply->msg.progpayloadcount,
+ reply->msg.iobref, req->trans_private);
+ rpcsvc_drc_client_unref (req->svc->drc, reply->client);
+
+ return ret;
+}
+
+/**
+ * rpcsvc_cache_reply - cache the reply for the processed request 'req'
+ *
+ * @param req - processed request
+ * @param iobref - iobref structure of the reply
+ * @param rpchdr - rpc header of the reply
+ * @param rpchdrcount - size of rpchdr
+ * @param proghdr - program header of the reply
+ * @param proghdrcount - size of proghdr
+ * @param payload - payload of the reply if any
+ * @param payloadcount - size of payload
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_reply (rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *payload, int payloadcount)
+{
+ int ret = -1;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT (req);
+ GF_ASSERT (req->reply);
+
+ reply = req->reply;
+
+ reply->state = DRC_OP_CACHED;
+
+ reply->msg.iobref = iobref_ref (iobref);
+
+ reply->msg.rpchdrcount = rpchdrcount;
+ reply->msg.rpchdr = iov_dup (rpchdr, rpchdrcount);
+
+ reply->msg.proghdrcount = proghdrcount;
+ reply->msg.proghdr = iov_dup (proghdr, proghdrcount);
+
+ reply->msg.progpayloadcount = payloadcount;
+ if (payloadcount)
+ reply->msg.progpayload = iov_dup (payload, payloadcount);
+
+ // rpcsvc_drc_client_unref (req->svc->drc, req->trans->drc_client);
+ // rpcsvc_drc_op_unref (req->svc->drc, reply);
+ ret = 0;
+
+ return ret;
+}
+
+/**
+ * rpcsvc_vacate_drc_entries - free up some percentage of drc cache
+ * based on the lru factor
+ *
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_vacate_drc_entries (rpcsvc_drc_globals_t *drc)
+{
+ uint32_t i = 0;
+ uint32_t n = 0;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t *tmp = NULL;
+ drc_client_t *client = NULL;
+
+ GF_ASSERT (drc);
+
+ n = drc->global_cache_size / drc->lru_factor;
+
+ list_for_each_entry_safe_reverse (reply, tmp, &drc->cache_head, global_list) {
+ /* Don't delete ops that are in transit */
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ continue;
+
+ client = reply->client;
+
+ (void *)rb_delete (client->rbtree, reply);
+
+ rpcsvc_drc_op_destroy (drc, reply);
+ rpcsvc_drc_client_unref (drc, client);
+ i++;
+ if (i >= n)
+ break;
+ }
+}
+
+/**
+ * rpcsvc_add_op_to_cache - insert the cached op into the client rbtree and drc list
+ *
+ * @param drc - the main drc structure
+ * @param reply - the op to be inserted
+ * @return 0 on success, -1 on failure
+ */
+static int
+rpcsvc_add_op_to_cache (rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t **tmp_reply = NULL;
+
+ GF_ASSERT (drc);
+ GF_ASSERT (reply);
+
+ client = reply->client;
+
+ /* cache is full, free up some space */
+ if (drc->op_count >= drc->global_cache_size)
+ rpcsvc_vacate_drc_entries (drc);
+
+ tmp_reply = (drc_cached_op_t **)rb_probe (client->rbtree, reply);
+ if (*tmp_reply != reply) {
+ /* should never happen */
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "DRC failed to detect duplicates");
+ return -1;
+ } else if (*tmp_reply == NULL) {
+ /* mem alloc failed */
+ return -1;
+ }
+
+ client->op_count++;
+ list_add (&reply->global_list, &drc->cache_head);
+ drc->op_count++;
+
+ return 0;
+}
+
+/**
+ * rpcsvc_cache_request - cache the in-transition incoming request
+ *
+ * @param req - incoming request
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_request (rpcsvc_request_t *req)
+{
+ int ret = -1;
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (req);
+
+ drc = req->svc->drc;
+
+ client = req->trans->drc_client;
+ if (!client) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc client is NULL");
+ goto out;
+ }
+
+ reply = mem_get (drc->mempool);
+ if (!reply)
+ goto out;
+
+ reply->client = rpcsvc_drc_client_ref (client);
+ reply->xid = req->xid;
+ reply->prognum = req->prognum;
+ reply->progversion = req->progver;
+ reply->procnum = req->procnum;
+ reply->state = DRC_OP_IN_TRANSIT;
+ req->reply = reply;
+
+ ret = rpcsvc_add_op_to_cache (drc, reply);
+ if (ret) {
+ req->reply = NULL;
+ rpcsvc_drc_op_destroy (drc, reply);
+ rpcsvc_drc_client_unref (drc, client);
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Failed to add op to drc cache");
+ }
+
+ out:
+ return ret;
+}
+
+/**
+ *
+ * rpcsvc_drc_priv - function which dumps the drc state
+ *
+ * @param drc - the main drc structure
+ * @return 0 on success, -1 on failure
+ */
+int32_t
+rpcsvc_drc_priv (rpcsvc_drc_globals_t *drc)
+{
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ drc_client_t *client = NULL;
+ char ip[INET6_ADDRSTRLEN] = {0};
+
+ if (!drc || drc->status == DRC_UNINITIATED) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "DRC is "
+ "uninitialized, not dumping its state");
+ return 0;
+ }
+
+ gf_proc_dump_add_section("rpc.drc");
+
+ if (TRY_LOCK (&drc->lock))
+ return -1;
+
+ gf_proc_dump_build_key (key, "drc", "type");
+ gf_proc_dump_write (key, "%d", drc->type);
+
+ gf_proc_dump_build_key (key, "drc", "client_count");
+ gf_proc_dump_write (key, "%d", drc->client_count);
+
+ gf_proc_dump_build_key (key, "drc", "current_cache_size");
+ gf_proc_dump_write (key, "%d", drc->op_count);
+
+ gf_proc_dump_build_key (key, "drc", "max_cache_size");
+ gf_proc_dump_write (key, "%d", drc->global_cache_size);
+
+ gf_proc_dump_build_key (key, "drc", "lru_factor");
+ gf_proc_dump_write (key, "%d", drc->lru_factor);
+
+ gf_proc_dump_build_key (key, "drc", "duplicate_request_count");
+ gf_proc_dump_write (key, "%d", drc->cache_hits);
+
+ gf_proc_dump_build_key (key, "drc", "in_transit_duplicate_requests");
+ gf_proc_dump_write (key, "%d", drc->intransit_hits);
+
+ list_for_each_entry (client, &drc->clients_head, client_list) {
+ gf_proc_dump_build_key (key, "client", "%d.ip-address", i);
+ memset (ip, 0, INET6_ADDRSTRLEN);
+ switch (client->sock_union.storage.ss_family) {
+ case AF_INET:
+ gf_proc_dump_write (key, "%s", inet_ntop (AF_INET,
+ &client->sock_union.sin.sin_addr.s_addr,
+ ip, INET_ADDRSTRLEN));
+ break;
+ case AF_INET6:
+ gf_proc_dump_write (key, "%s", inet_ntop (AF_INET6,
+ &client->sock_union.sin6.sin6_addr,
+ ip, INET6_ADDRSTRLEN));
+ break;
+ default:
+ gf_proc_dump_write (key, "%s", "N/A");
+ }
+
+ gf_proc_dump_build_key (key, "client", "%d.ref_count", i);
+ gf_proc_dump_write (key, "%d", client->ref);
+ gf_proc_dump_build_key (key, "client", "%d.op_count", i);
+ gf_proc_dump_write (key, "%d", client->op_count);
+ i++;
+ }
+
+ UNLOCK (&drc->lock);
+ return 0;
+}
+
+/**
+ * rpcsvc_drc_notify - function which is notified of RPC transport events
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param xl - pointer to the xlator
+ * @param event - the event which triggered this notify
+ * @param data - the transport structure
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_drc_notify (rpcsvc_t *svc, void *xl,
+ rpcsvc_event_t event, void *data)
+{
+ int ret = -1;
+ rpc_transport_t *trans = NULL;
+ drc_client_t *client = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (svc->drc);
+ GF_ASSERT (data);
+
+ drc = svc->drc;
+
+ if (drc->status == DRC_UNINITIATED ||
+ drc->type == DRC_TYPE_NONE)
+ return 0;
+
+ LOCK (&drc->lock);
+
+ trans = (rpc_transport_t *)data;
+ client = rpcsvc_get_drc_client (drc, &trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ trans->drc_client = rpcsvc_drc_client_ref (client);
+ ret = 0;
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ ret = 0;
+ if (list_empty (&drc->clients_head))
+ break;
+ /* should be the last unref */
+ rpcsvc_drc_client_unref (drc, client);
+ trans->drc_client = NULL;
+ break;
+
+ default:
+ break;
+ }
+
+ out:
+ UNLOCK (&drc->lock);
+ return ret;
+}
+
+/**
+ * rpcsvc_drc_init - Initialize the duplicate request cache service
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param options - the options dictionary which configures drc
+ * @return 0 on success, non-zero integer on failure
+ */
+int
+rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+ uint32_t drc_type = 0;
+ uint32_t drc_size = 0;
+ uint32_t drc_factor = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+ static gf_boolean_t drc_inited = _gf_false;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (options);
+
+ /* Already inited */
+ if (drc_inited)
+ return 0;
+
+ if (!svc->drc) {
+ drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t),
+ gf_common_mt_drc_globals_t);
+ if (!drc)
+ return -1;
+
+ svc->drc = drc;
+ LOCK_INIT (&drc->lock);
+ } else {
+ drc = svc->drc;
+ }
+
+ LOCK (&drc->lock);
+ if (drc->type != DRC_TYPE_NONE) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Toggle DRC on/off, when more drc types(persistent/cluster)
+ are added, we shouldn't treat this as boolean */
+ ret = dict_get_str_boolean (options, "nfs.drc", _gf_true);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "drc user options need second look");
+ ret = _gf_true;
+ }
+ drc->enable_drc = ret;
+
+ if (ret == _gf_false) {
+ /* drc off */
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "DRC is off");
+ ret = 0;
+ goto out;
+ }
+
+ /* Specify type of DRC to be used */
+ ret = dict_get_uint32 (options, "nfs.drc-type", &drc_type);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc type not set."
+ " Continuing with default");
+ drc_type = DRC_DEFAULT_TYPE;
+ }
+
+ drc->type = drc_type;
+
+ /* Set the global cache size (no. of ops to cache) */
+ ret = dict_get_uint32 (options, "nfs.drc-size", &drc_size);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc size not set."
+ " Continuing with default size");
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+ }
+
+ drc->global_cache_size = drc_size;
+
+ /* Mempool for cached ops */
+ drc->mempool = mem_pool_new (drc_cached_op_t, drc->global_cache_size);
+ if (!drc->mempool) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get mempool for"
+ " DRC, drc-size: %d", drc->global_cache_size);
+ ret = -1;
+ goto out;
+ }
+
+ /* What percent of cache to be evicted whenever it fills up */
+ ret = dict_get_uint32 (options, "nfs.drc-lru-factor", &drc_factor);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc lru factor not set."
+ " Continuing with policy default");
+ drc_factor = DRC_DEFAULT_LRU_FACTOR;
+ }
+
+ drc->lru_factor = (drc_lru_factor_t) drc_factor;
+
+ INIT_LIST_HEAD (&drc->clients_head);
+ INIT_LIST_HEAD (&drc->cache_head);
+
+ ret = rpcsvc_register_notify (svc, rpcsvc_drc_notify, THIS);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "registration of drc_notify function failed");
+ goto out;
+ }
+
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
+ drc->status = DRC_INITIATED;
+ drc_inited = _gf_true;
+
+ out:
+ UNLOCK (&drc->lock);
+ if (ret == -1) {
+ if (drc->mempool) {
+ mem_pool_destroy (drc->mempool);
+ drc->mempool = NULL;
+ }
+ GF_FREE (drc);
+ svc->drc = NULL;
+ }
+ return ret;
+}
+
+int
+rpcsvc_drc_reconfigure (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ gf_boolean_t enable_drc = _gf_false;
+ rpcsvc_drc_globals_t *drc = NULL;
+ uint32_t drc_size = 0;
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ drc = svc->drc;
+ /* reconfig for drc-size */
+ if (dict_get_uint32 (options, "nfs.drc-size", &drc_size))
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+
+ if (drc->global_cache_size != drc_size) {
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "nfs.drc-size size can not "
+ "be reconfigured without NFS server restart.");
+ return (-1);
+ }
+
+ /* reconfig for nfs.drc */
+ ret = dict_get_str_boolean (options, "nfs.drc", _gf_true);
+ if (ret < 0) {
+ ret = _gf_true;
+ }
+ enable_drc = ret;
+
+ if (drc->enable_drc == enable_drc)
+ return 0;
+
+ drc->enable_drc = enable_drc;
+ if (enable_drc) {
+ if (drc == NULL)
+ return rpcsvc_drc_init(svc, options);
+ } else {
+ if (drc == NULL)
+ return (0);
+
+ LOCK (&drc->lock);
+ (void) rpcsvc_unregister_notify (svc, rpcsvc_drc_notify, THIS);
+ if (drc->mempool) {
+ mem_pool_destroy (drc->mempool);
+ drc->mempool = NULL;
+ }
+ UNLOCK (&drc->lock);
+ GF_FREE (drc);
+ svc->drc = NULL;
+ }
+
+ return (0);
+}
diff --git a/rpc/rpc-lib/src/rpc-drc.h b/rpc/rpc-lib/src/rpc-drc.h
new file mode 100644
index 000000000..7dfaef978
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.h
@@ -0,0 +1,104 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef RPC_DRC_H
+#define RPC_DRC_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "rpcsvc-common.h"
+#include "rpcsvc.h"
+#include "locking.h"
+#include "dict.h"
+#include "rb.h"
+
+/* per-client cache structure */
+struct drc_client {
+ uint32_t ref;
+ union gf_sock_union sock_union;
+ /* pointers to the cache */
+ struct rb_table *rbtree;
+ /* no. of ops currently cached */
+ uint32_t op_count;
+ struct list_head client_list;
+};
+
+struct drc_cached_op {
+ drc_op_state_t state;
+ uint32_t xid;
+ int prognum;
+ int progversion;
+ int procnum;
+ rpc_transport_msg_t msg;
+ drc_client_t *client;
+ struct list_head client_list;
+ struct list_head global_list;
+ int32_t ref;
+};
+
+/* global drc definitions */
+enum drc_status {
+ DRC_UNINITIATED,
+ DRC_INITIATED
+};
+typedef enum drc_status drc_status_t;
+
+struct drc_globals {
+ /* allocator must be the first member since
+ * it is used so in gf_libavl_allocator
+ */
+ struct libavl_allocator allocator;
+ drc_type_t type;
+ /* configurable size parameter */
+ uint32_t global_cache_size;
+ drc_lru_factor_t lru_factor;
+ gf_lock_t lock;
+ drc_status_t status;
+ uint32_t op_count;
+ uint64_t cache_hits;
+ uint64_t intransit_hits;
+ struct mem_pool *mempool;
+ struct list_head cache_head;
+ uint32_t client_count;
+ struct list_head clients_head;
+ gf_boolean_t enable_drc;
+};
+
+int
+rpcsvc_need_drc (rpcsvc_request_t *req);
+
+drc_cached_op_t *
+rpcsvc_drc_lookup (rpcsvc_request_t *req);
+
+int
+rpcsvc_send_cached_reply (rpcsvc_request_t *req, drc_cached_op_t *reply);
+
+int
+rpcsvc_cache_reply (rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount,
+ struct iovec *proghdr, int proghdrcount,
+ struct iovec *payload, int payloadcount);
+
+int
+rpcsvc_cache_request (rpcsvc_request_t *req);
+
+int32_t
+rpcsvc_drc_priv (rpcsvc_drc_globals_t *drc);
+
+int
+rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_drc_reconfigure (rpcsvc_t *svc, dict_t *options);
+
+#endif /* RPC_DRC_H */
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index e07bf0d76..c24d41084 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -69,6 +69,19 @@ out:
return ret;
}
+int
+rpc_transport_throttle (rpc_transport_t *this, gf_boolean_t onoff)
+{
+ int ret = 0;
+
+ if (!this->ops->throttle)
+ return -ENOSYS;
+
+ ret = this->ops->throttle (this, onoff);
+
+ return ret;
+}
+
int32_t
rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
struct sockaddr_storage *sa, size_t salen)
@@ -145,6 +158,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
volume_opt_list_t *vol_opt = NULL;
gf_boolean_t bind_insecure = _gf_false;
+ xlator_t *this = NULL;
GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
@@ -169,7 +183,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
gf_log ("dict", GF_LOG_DEBUG,
"setting transport-type failed");
else
- gf_log ("rpc-transport", GF_LOG_WARNING,
+ gf_log ("rpc-transport", GF_LOG_DEBUG,
"missing 'option transport-type'. defaulting to "
"\"socket\"");
} else {
@@ -250,13 +264,15 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
if (handle == NULL) {
gf_log ("rpc-transport", GF_LOG_ERROR, "%s", dlerror ());
- gf_log ("rpc-transport", GF_LOG_ERROR,
+ gf_log ("rpc-transport", GF_LOG_WARNING,
"volume '%s': transport-type '%s' is not valid or "
"not found on this machine",
trans_name, type);
goto fail;
}
+ trans->dl_handle = handle;
+
trans->ops = dlsym (handle, "tops");
if (trans->ops == NULL) {
gf_log ("rpc-transport", GF_LOG_ERROR,
@@ -264,22 +280,22 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
goto fail;
}
- trans->init = dlsym (handle, "init");
+ *VOID(&(trans->init)) = dlsym (handle, "init");
if (trans->init == NULL) {
gf_log ("rpc-transport", GF_LOG_ERROR,
"dlsym (gf_rpc_transport_init) on %s", dlerror ());
goto fail;
}
- trans->fini = dlsym (handle, "fini");
+ *VOID(&(trans->fini)) = dlsym (handle, "fini");
if (trans->fini == NULL) {
gf_log ("rpc-transport", GF_LOG_ERROR,
"dlsym (gf_rpc_transport_fini) on %s", dlerror ());
goto fail;
}
- trans->reconfigure = dlsym (handle, "reconfigure");
- if (trans->fini == NULL) {
+ *VOID(&(trans->reconfigure)) = dlsym (handle, "reconfigure");
+ if (trans->reconfigure == NULL) {
gf_log ("rpc-transport", GF_LOG_DEBUG,
"dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
}
@@ -290,14 +306,15 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
goto fail;
}
+ this = THIS;
vol_opt->given_opt = dlsym (handle, "options");
if (vol_opt->given_opt == NULL) {
gf_log ("rpc-transport", GF_LOG_DEBUG,
"volume option validation not specified");
} else {
INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &(THIS->volume_options));
- if (xlator_options_validate_list (THIS, options, vol_opt,
+ list_add_tail (&vol_opt->list, &(this->volume_options));
+ if (xlator_options_validate_list (this, options, vol_opt,
NULL)) {
gf_log ("rpc-transport", GF_LOG_ERROR,
"volume option validation failed");
@@ -308,7 +325,7 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
trans->options = options;
pthread_mutex_init (&trans->lock, NULL);
- trans->xl = THIS;
+ trans->xl = this;
ret = trans->init (trans);
if (ret != 0) {
@@ -319,23 +336,25 @@ rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
return_trans = trans;
- if (name) {
- GF_FREE (name);
- }
+ GF_FREE (name);
return return_trans;
fail:
if (trans) {
- if (trans->name) {
- GF_FREE (trans->name);
- }
+ GF_FREE (trans->name);
+
+ if (trans->dl_handle)
+ dlclose (trans->dl_handle);
GF_FREE (trans);
}
- if (name) {
- GF_FREE (name);
+ GF_FREE (name);
+
+ if (vol_opt && !list_empty (&vol_opt->list)) {
+ list_del_init (&vol_opt->list);
+ GF_FREE (vol_opt);
}
return NULL;
@@ -423,8 +442,10 @@ rpc_transport_destroy (rpc_transport_t *this)
pthread_mutex_destroy (&this->lock);
- if (this->name)
- GF_FREE (this->name);
+ GF_FREE (this->name);
+
+ if (this->dl_handle)
+ dlclose (this->dl_handle);
GF_FREE (this);
fail:
@@ -469,6 +490,8 @@ rpc_transport_unref (rpc_transport_t *this)
if (this->mydata)
this->notify (this, this->mydata, RPC_TRANSPORT_CLEANUP,
NULL);
+ this->mydata = NULL;
+ this->notify = NULL;
rpc_transport_destroy (this);
}
@@ -512,18 +535,6 @@ out:
}
-inline int
-rpc_transport_unregister_notify (rpc_transport_t *trans)
-{
- GF_VALIDATE_OR_GOTO ("rpc-transport", trans, out);
-
- trans->notify = NULL;
- trans->mydata = NULL;
-
-out:
- return 0;
-}
-
//give negative values to skip setting that value
//this function asserts if both the values are negative.
@@ -551,6 +562,63 @@ out:
}
int
+rpc_transport_unix_options_build (dict_t **options, char *filepath,
+ int frame_timeout)
+{
+ dict_t *dict = NULL;
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT (filepath);
+ GF_ASSERT (options);
+
+ dict = dict_new ();
+ if (!dict)
+ goto out;
+
+ fpath = gf_strdup (filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr (dict, "transport.socket.connect-path", fpath);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str (dict, "transport.socket.keepalive", "off");
+ if (ret)
+ goto out;
+
+ if (frame_timeout > 0) {
+ ret = dict_set_int32 (dict, "frame-timeout", frame_timeout);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE (fpath);
+ if (dict)
+ dict_unref (dict);
+ }
+ return ret;
+}
+
+int
rpc_transport_inet_options_build (dict_t **options, const char *hostname,
int port)
{
@@ -585,7 +653,7 @@ rpc_transport_inet_options_build (dict_t **options, const char *hostname,
"failed to set remote-port with %d", port);
goto out;
}
- ret = dict_set_str (dict, "transport.address-family", "inet/inet6");
+ ret = dict_set_str (dict, "transport.address-family", "inet");
if (ret) {
gf_log (THIS->name, GF_LOG_WARNING,
"failed to set addr-family with inet");
@@ -602,8 +670,7 @@ rpc_transport_inet_options_build (dict_t **options, const char *hostname,
*options = dict;
out:
if (ret) {
- if (host)
- GF_FREE (host);
+ GF_FREE (host);
if (dict)
dict_unref (dict);
}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index d9ab30dd8..2db9072ae 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -71,6 +71,11 @@ struct peer_info {
struct sockaddr_storage sockaddr;
socklen_t sockaddr_len;
char identifier[UNIX_PATH_MAX];
+ // OP-VERSION of clients
+ uint32_t max_op_version;
+ uint32_t min_op_version;
+ //Volume mounted by client
+ char volname[1024];
};
typedef struct peer_info peer_info_t;
@@ -181,16 +186,19 @@ struct rpc_transport {
*/
void *private;
- void *xl_private;
+ struct _client_t *xl_private;
void *xl; /* Used for THIS */
void *mydata;
pthread_mutex_t lock;
int32_t refcount;
+ int32_t outstanding_rpc_count;
+
glusterfs_ctx_t *ctx;
dict_t *options;
char *name;
void *dnscache;
+ void *drc_client;
data_t *buf;
int32_t (*init) (rpc_transport_t *this);
void (*fini) (rpc_transport_t *this);
@@ -205,6 +213,7 @@ struct rpc_transport {
struct list_head list;
int bind_insecure;
+ void *dl_handle; /* handle of dlopen() */
};
struct rpc_transport_ops {
@@ -228,6 +237,7 @@ struct rpc_transport_ops {
int32_t (*get_myaddr) (rpc_transport_t *this, char *peeraddr,
int addrlen, struct sockaddr_storage *sa,
socklen_t sasize);
+ int32_t (*throttle) (rpc_transport_t *this, gf_boolean_t onoff);
};
@@ -267,9 +277,6 @@ int
rpc_transport_register_notify (rpc_transport_t *trans, rpc_transport_notify_t,
void *mydata);
-int
-rpc_transport_unregister_notify (rpc_transport_t *trans);
-
int32_t
rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen);
@@ -284,6 +291,9 @@ int32_t
rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
struct sockaddr_storage *sa, size_t salen);
+int
+rpc_transport_throttle (rpc_transport_t *this, gf_boolean_t onoff);
+
rpc_transport_pollin_t *
rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
int count, struct iobuf *hdr_iobuf,
@@ -296,5 +306,9 @@ rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
int32_t time);
int
+rpc_transport_unix_options_build (dict_t **options, char *filepath,
+ int frame_timeout);
+
+int
rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port);
#endif /* __RPC_TRANSPORT_H__ */
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
index 3a46cc498..4cb86a758 100644
--- a/rpc/rpc-lib/src/rpcsvc-auth.c
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -178,6 +178,29 @@ err:
}
int
+rpcsvc_set_addr_namelookup (rpcsvc_t *svc, dict_t *options)
+{
+ int ret;
+ static char *addrlookup_key = "rpc-auth.addr.namelookup";
+
+ if (!svc || !options)
+ return (-1);
+
+ /* By default it's disabled */
+ ret = dict_get_str_boolean (options, addrlookup_key, _gf_false);
+ if (ret < 0) {
+ svc->addr_namelookup = _gf_false;
+ } else {
+ svc->addr_namelookup = ret;
+ }
+
+ if (svc->addr_namelookup)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Addr-Name lookup enabled");
+
+ return (0);
+}
+
+int
rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options)
{
int ret = -1;
@@ -204,6 +227,26 @@ rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options)
}
int
+rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ GF_ASSERT (svc);
+ GF_ASSERT (options);
+
+ ret = dict_get_str_boolean (options, "root-squash", 0);
+ if (ret != -1)
+ svc->root_squash = ret;
+ else
+ svc->root_squash = _gf_false;
+
+ if (svc->root_squash)
+ gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled ");
+
+ return 0;
+}
+
+int
rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
{
int ret = -1;
@@ -212,6 +255,8 @@ rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
return -1;
(void) rpcsvc_set_allow_insecure (svc, options);
+ (void) rpcsvc_set_root_squash (svc, options);
+ (void) rpcsvc_set_addr_namelookup (svc, options);
ret = rpcsvc_auth_add_initers (svc);
if (ret == -1) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
@@ -228,6 +273,25 @@ out:
return ret;
}
+int
+rpcsvc_auth_reconf (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ ret = rpcsvc_set_allow_insecure (svc, options);
+ if (ret)
+ return (-1);
+
+ ret = rpcsvc_set_root_squash (svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_set_addr_namelookup (svc, options);
+}
+
rpcsvc_auth_t *
__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
@@ -306,6 +370,9 @@ rpcsvc_auth_request_init (rpcsvc_request_t *req)
if (!auth->authops->request_init)
ret = auth->authops->request_init (req, auth->authprivate);
+ req->auxgids = req->auxgidsmall; /* reset to auxgidlarge during
+ unsersialize if necessary */
+ req->auxgidlarge = NULL;
err:
return ret;
}
@@ -345,14 +412,10 @@ err:
int
rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
{
- int count = 0;
- int gen = RPCSVC_AUTH_REJECT;
- int spec = RPCSVC_AUTH_REJECT;
- int final = RPCSVC_AUTH_REJECT;
- char *srchstr = NULL;
- char *valstr = NULL;
- gf_boolean_t boolval = _gf_false;
- int ret = 0;
+ int count = 0;
+ int result = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ int ret = 0;
struct rpcsvc_auth_list *auth = NULL;
struct rpcsvc_auth_list *tmp = NULL;
@@ -370,59 +433,27 @@ rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
if (count >= arrlen)
break;
- gen = gf_asprintf (&srchstr, "rpc-auth.%s", auth->name);
- if (gen == -1) {
+ result = gf_asprintf (&srchstr, "rpc-auth.%s.%s",
+ auth->name, volname);
+ if (result == -1) {
count = -1;
goto err;
}
- gen = RPCSVC_AUTH_REJECT;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- gen = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
+ ret = dict_get_str_boolean (svc->options, srchstr, 0xC00FFEE);
GF_FREE (srchstr);
- spec = gf_asprintf (&srchstr, "rpc-auth.%s.%s", auth->name,
- volname);
- if (spec == -1) {
- count = -1;
- goto err;
- }
- spec = RPCSVC_AUTH_DONTCARE;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- spec = RPCSVC_AUTH_ACCEPT;
- else
- spec = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- final = rpcsvc_combine_gen_spec_volume_checks (gen, spec);
- if (final == RPCSVC_AUTH_ACCEPT) {
+ switch (ret) {
+ case _gf_true:
+ result = RPCSVC_AUTH_ACCEPT;
autharr[count] = auth->auth->authnum;
++count;
+ break;
+ case _gf_false:
+ result = RPCSVC_AUTH_REJECT;
+ break;
+ default:
+ result = RPCSVC_AUTH_DONTCARE;
}
}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
index 81f798116..aed55e039 100644
--- a/rpc/rpc-lib/src/rpcsvc-common.h
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -30,6 +30,8 @@ struct rpcsvc_state;
typedef int (*rpcsvc_notify_t) (struct rpcsvc_state *, void *mydata,
rpcsvc_event_t, void *data);
+struct drc_globals;
+typedef struct drc_globals rpcsvc_drc_globals_t;
/* Contains global state required for all the RPC services.
*/
@@ -50,24 +52,75 @@ typedef struct rpcsvc_state {
dict_t *options;
/* Allow insecure ports. */
- int allow_insecure;
+ gf_boolean_t allow_insecure;
gf_boolean_t register_portmap;
+ gf_boolean_t root_squash;
glusterfs_ctx_t *ctx;
/* list of connections which will listen for incoming connections */
- struct list_head listeners;
+ struct list_head listeners;
/* list of programs registered with rpcsvc */
- struct list_head programs;
+ struct list_head programs;
/* list of notification callbacks */
- struct list_head notify;
- int notify_count;
+ struct list_head notify;
+ int notify_count;
void *mydata; /* This is xlator */
- rpcsvc_notify_t notifyfn;
+ rpcsvc_notify_t notifyfn;
struct mem_pool *rxpool;
+ rpcsvc_drc_globals_t *drc;
+
+ /* per-client limit of outstanding rpc requests */
+ int outstanding_rpc_limit;
+ gf_boolean_t addr_namelookup;
} rpcsvc_t;
+/* DRC START */
+enum drc_op_type {
+ DRC_NA = 0,
+ DRC_IDEMPOTENT = 1,
+ DRC_NON_IDEMPOTENT = 2
+};
+typedef enum drc_op_type drc_op_type_t;
+
+enum drc_type {
+ DRC_TYPE_NONE = 0,
+ DRC_TYPE_IN_MEMORY = 1
+};
+typedef enum drc_type drc_type_t;
+
+enum drc_lru_factor {
+ DRC_LRU_5_PC = 20,
+ DRC_LRU_10_PC = 10,
+ DRC_LRU_25_PC = 4,
+ DRC_LRU_50_PC = 2
+};
+typedef enum drc_lru_factor drc_lru_factor_t;
+
+enum drc_xid_state {
+ DRC_XID_MONOTONOUS = 0,
+ DRC_XID_WRAPPED = 1
+};
+typedef enum drc_xid_state drc_xid_state_t;
+
+enum drc_op_state {
+ DRC_OP_IN_TRANSIT = 0,
+ DRC_OP_CACHED = 1
+};
+typedef enum drc_op_state drc_op_state_t;
+
+enum drc_policy {
+ DRC_LRU = 0
+};
+typedef enum drc_policy drc_policy_t;
+
+/* Default policies for DRC */
+#define DRC_DEFAULT_TYPE DRC_TYPE_IN_MEMORY
+#define DRC_DEFAULT_CACHE_SIZE 0x20000
+#define DRC_DEFAULT_LRU_FACTOR DRC_LRU_25_PC
+
+/* DRC END */
#endif /* #ifndef _RPCSVC_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index 98cc88d63..037c157f2 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -27,6 +27,8 @@
#include "xdr-common.h"
#include "xdr-generic.h"
#include "rpc-common-xdr.h"
+#include "syncop.h"
+#include "rpc-drc.h"
#include <errno.h>
#include <pthread.h>
@@ -40,8 +42,7 @@
#include <stdio.h>
#include "xdr-rpcclnt.h"
-
-#define ACL_PROGRAM 100227
+#include "glusterfs-acl.h"
struct rpcsvc_program gluster_dump_prog;
@@ -128,6 +129,37 @@ rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
return NULL;
}
+int
+rpcsvc_request_outstanding (rpcsvc_t *svc, rpc_transport_t *trans, int delta)
+{
+ int ret = 0;
+ int old_count = 0;
+ int new_count = 0;
+ int limit = 0;
+
+ pthread_mutex_lock (&trans->lock);
+ {
+ limit = svc->outstanding_rpc_limit;
+ if (!limit)
+ goto unlock;
+
+ old_count = trans->outstanding_rpc_count;
+ trans->outstanding_rpc_count += delta;
+ new_count = trans->outstanding_rpc_count;
+
+ if (old_count <= limit && new_count > limit)
+ ret = rpc_transport_throttle (trans, _gf_true);
+
+ if (old_count > limit && new_count <= limit)
+ ret = rpc_transport_throttle (trans, _gf_false);
+ }
+unlock:
+ pthread_mutex_unlock (&trans->lock);
+
+ return ret;
+}
+
+
/* This needs to change to returning errors, since
* we need to return RPC specific error messages when some
* of the pointers below are NULL.
@@ -206,6 +238,8 @@ rpcsvc_program_actor (rpcsvc_request_t *req)
goto err;
}
+ req->synctask = program->synctask;
+
err = SUCCESS;
gf_log (GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s",
program->progname, actor->procname);
@@ -220,7 +254,7 @@ err:
/* this procedure can only pass 4 arguments to registered notifyfn. To send more
* arguments call wrapper->notify directly.
*/
-inline void
+static inline void
rpcsvc_program_notify (rpcsvc_listener_t *listener, rpcsvc_event_t event,
void *data)
{
@@ -243,7 +277,7 @@ out:
}
-inline int
+static inline int
rpcsvc_accept (rpcsvc_t *svc, rpc_transport_t *listen_trans,
rpc_transport_t *new_trans)
{
@@ -273,8 +307,20 @@ rpcsvc_request_destroy (rpcsvc_request_t *req)
iobref_unref (req->iobref);
}
+ if (req->hdr_iobuf)
+ iobuf_unref (req->hdr_iobuf);
+
+ /* This marks the "end" of an RPC request. Reply is
+ completely written to the socket and is on the way
+ to the client. It is time to decrement the
+ outstanding request counter by 1.
+ */
+ rpcsvc_request_outstanding (req->svc, req->trans, -1);
+
rpc_transport_unref (req->trans);
+ GF_FREE (req->auxgidlarge);
+
mem_put (req);
out:
@@ -357,6 +403,12 @@ rpcsvc_request_create (rpcsvc_t *svc, rpc_transport_t *trans,
goto err;
}
+ /* We just received a new request from the wire. Account for
+ it in the outsanding request counter to make sure we don't
+ ingest too many concurrent requests from the same client.
+ */
+ ret = rpcsvc_request_outstanding (svc, trans, +1);
+
msgbuf = msg->vector[0].iov_base;
msglen = msg->vector[0].iov_len;
@@ -416,6 +468,7 @@ rpcsvc_request_create (rpcsvc_t *svc, rpc_transport_t *trans,
* since we are not handling authentication failures for now.
*/
req->rpc_status = MSG_ACCEPTED;
+ req->reply = NULL;
ret = 0;
err:
if (ret == -1) {
@@ -431,15 +484,39 @@ err:
int
+rpcsvc_check_and_reply_error (int ret, call_frame_t *frame, void *opaque)
+{
+ rpcsvc_request_t *req = NULL;
+
+ req = opaque;
+
+ if (ret)
+ gf_log ("rpcsvc", GF_LOG_ERROR,
+ "rpc actor failed to complete successfully");
+
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = rpcsvc_error_reply (req);
+ if (ret)
+ gf_log ("rpcsvc", GF_LOG_WARNING,
+ "failed to queue error reply");
+ }
+
+ return 0;
+}
+
+int
rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
rpc_transport_pollin_t *msg)
{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = -1;
- uint16_t port = 0;
- gf_boolean_t is_unix = _gf_false;
- gf_boolean_t unprivileged = _gf_false;
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_actor actor_fn = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+ uint16_t port = 0;
+ gf_boolean_t is_unix = _gf_false;
+ gf_boolean_t unprivileged = _gf_false;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
if (!trans || !svc)
return -1;
@@ -475,7 +552,7 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
req = rpcsvc_request_create (svc, trans, msg);
if (!req)
- goto err;
+ goto out;
if (!rpcsvc_request_accepted (req))
goto err_reply;
@@ -493,40 +570,76 @@ rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
return -1;
}
+ /* DRC */
+ if (rpcsvc_need_drc (req)) {
+ drc = req->svc->drc;
+
+ LOCK (&drc->lock);
+ reply = rpcsvc_drc_lookup (req);
+
+ /* retransmission of completed request, send cached reply */
+ if (reply && reply->state == DRC_OP_CACHED) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "duplicate request:"
+ " XID: 0x%x", req->xid);
+ ret = rpcsvc_send_cached_reply (req, reply);
+ drc->cache_hits++;
+ UNLOCK (&drc->lock);
+ goto out;
+
+ } /* retransmitted request, original op in transit, drop it */
+ else if (reply && reply->state == DRC_OP_IN_TRANSIT) {
+ gf_log (GF_RPCSVC, GF_LOG_INFO, "op in transit,"
+ " discarding. XID: 0x%x", req->xid);
+ ret = 0;
+ drc->intransit_hits++;
+ rpcsvc_request_destroy (req);
+ UNLOCK (&drc->lock);
+ goto out;
+
+ } /* fresh request, cache it as in-transit and proceed */
+ else {
+ ret = rpcsvc_cache_request (req);
+ }
+ UNLOCK (&drc->lock);
+ }
+
if (req->rpc_err == SUCCESS) {
/* Before going to xlator code, set the THIS properly */
THIS = svc->mydata;
- if (req->count == 2) {
- if (actor->vector_actor) {
- ret = actor->vector_actor (req, &req->msg[1], 1,
- req->iobref);
- } else {
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- /* LOG TODO: print more info about procnum,
- prognum etc, also print transport info */
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "No vectored handler present");
- ret = RPCSVC_ACTOR_ERROR;
- }
- } else if (actor->actor) {
- ret = actor->actor (req);
+ actor_fn = actor->actor;
+
+ if (!actor_fn) {
+ rpcsvc_request_seterr (req, PROC_UNAVAIL);
+ /* LOG TODO: print more info about procnum,
+ prognum etc, also print transport info */
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "No vectored handler present");
+ ret = RPCSVC_ACTOR_ERROR;
+ goto err_reply;
}
- }
-err_reply:
- if (ret == RPCSVC_ACTOR_ERROR) {
- ret = rpcsvc_error_reply (req);
+ if (req->synctask) {
+ if (msg->hdr_iobuf)
+ req->hdr_iobuf = iobuf_ref (msg->hdr_iobuf);
+
+ ret = synctask_new (THIS->ctx->env,
+ (synctask_fn_t) actor_fn,
+ rpcsvc_check_and_reply_error, NULL,
+ req);
+ } else {
+ ret = actor_fn (req);
+ }
}
- if (ret)
- gf_log ("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+err_reply:
+ ret = rpcsvc_check_and_reply_error (ret, NULL, req);
/* No need to propagate error beyond this function since the reply
* has now been queued. */
ret = 0;
-err:
+out:
return ret;
}
@@ -687,7 +800,7 @@ err:
return txrecord;
}
-inline int
+static inline int
rpcsvc_get_callid (rpcsvc_t *rpc)
{
return GF_UNIVERSAL_ANSWER;
@@ -873,21 +986,22 @@ out:
return ret;
}
-inline int
-rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *hdrvec,
- int hdrcount, struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *priv)
+int
+rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv)
{
int ret = -1;
rpc_transport_reply_t reply = {{0, }};
- if ((!trans) || (!hdrvec) || (!hdrvec->iov_base)) {
+ if ((!trans) || (!rpchdr) || (!rpchdr->iov_base)) {
goto out;
}
- reply.msg.rpchdr = hdrvec;
- reply.msg.rpchdrcount = hdrcount;
+ reply.msg.rpchdr = rpchdr;
+ reply.msg.rpchdrcount = rpchdrcount;
reply.msg.proghdr = proghdr;
reply.msg.proghdrcount = proghdrcount;
reply.msg.progpayload = progpayload;
@@ -1033,6 +1147,7 @@ rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
size_t msglen = 0;
size_t hdrlen = 0;
char new_iobref = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
if ((!req) || (!req->trans))
return -1;
@@ -1067,20 +1182,31 @@ rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
iobref_add (iobref, replyiob);
+ /* cache the request in the duplicate request cache for appropriate ops */
+ if (req->reply) {
+ drc = req->svc->drc;
+
+ LOCK (&drc->lock);
+ ret = rpcsvc_cache_reply (req, iobref, &recordhdr, 1,
+ proghdr, hdrcount,
+ payload, payloadcount);
+ UNLOCK (&drc->lock);
+ }
+
ret = rpcsvc_transport_submit (trans, &recordhdr, 1, proghdr, hdrcount,
payload, payloadcount, iobref,
req->trans_private);
if (ret == -1) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to submit message "
- "(XID: 0x%ux, Program: %s, ProgVers: %d, Proc: %d) to "
+ "(XID: 0x%x, Program: %s, ProgVers: %d, Proc: %d) to "
"rpc-transport (%s)", req->xid,
req->prog ? req->prog->progname : "(not matched)",
req->prog ? req->prog->progver : 0,
req->procnum, trans->name);
} else {
gf_log (GF_RPCSVC, GF_LOG_TRACE,
- "submitted reply for rpc-message (XID: 0x%ux, "
+ "submitted reply for rpc-message (XID: 0x%x, "
"Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
"(%s)", req->xid, req->prog ? req->prog->progname: "-",
req->prog ? req->prog->progver : 0,
@@ -1123,12 +1249,13 @@ rpcsvc_error_reply (rpcsvc_request_t *req)
inline int
rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port)
{
- int ret = 0;
+ int ret = -1; /* FAIL */
if (!newprog) {
goto out;
}
+ /* pmap_set() returns 0 for FAIL and 1 for SUCCESS */
if (!(pmap_set (newprog->prognum, newprog->progver, IPPROTO_TCP,
port))) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
@@ -1136,7 +1263,7 @@ rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port)
goto out;
}
- ret = 0;
+ ret = 0; /* SUCCESS */
out:
return ret;
}
@@ -1145,7 +1272,7 @@ out:
inline int
rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
{
- int ret = 0;
+ int ret = -1;
if (!prog)
goto out;
@@ -1502,6 +1629,7 @@ rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name)
}
GF_FREE (transport_name);
+ transport_name = NULL;
count++;
}
@@ -1513,17 +1641,13 @@ rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name)
transport_type = NULL;
out:
- if (str != NULL) {
- GF_FREE (str);
- }
+ GF_FREE (str);
- if (transport_type != NULL) {
- GF_FREE (transport_type);
- }
+ GF_FREE (transport_type);
- if (tmp != NULL) {
- GF_FREE (tmp);
- }
+ GF_FREE (tmp);
+
+ GF_FREE (transport_name);
return count;
}
@@ -1700,15 +1824,17 @@ rpcsvc_dump (rpcsvc_request_t *req)
uint32_t dump_rsp_len = 0;
if (!req)
- goto fail;
+ goto sendrsp;
ret = build_prog_details (req, &rsp);
if (ret < 0) {
op_errno = -ret;
- goto fail;
+ goto sendrsp;
}
-fail:
+ op_errno = 0;
+
+sendrsp:
rsp.op_errno = gf_errno_to_error (op_errno);
rsp.op_ret = ret;
@@ -1720,15 +1846,12 @@ fail:
ret = xdr_serialize_generic (iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
if (ret < 0) {
- if (req)
- req->rpc_err = GARBAGE_ARGS;
- op_errno = EINVAL;
- goto fail;
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rpcsvc_submit_generic (req, &iov, 1, NULL, 0, NULL);
+ ret = 0;
}
- ret = rpcsvc_submit_generic (req, &iov, 1, NULL, 0,
- NULL);
-
free_prog_details (&rsp);
return ret;
@@ -1767,12 +1890,92 @@ rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Portmap registration "
"disabled");
- ret = 0;
+ ret = rpcsvc_set_outstanding_rpc_limit (svc, options);
out:
return ret;
}
int
+rpcsvc_reconfigure_options (rpcsvc_t *svc, dict_t *options)
+{
+ xlator_t *xlator = NULL;
+ xlator_list_t *volentry = NULL;
+ char *srchkey = NULL;
+ char *keyval = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!svc->options) || (!options))
+ return (-1);
+
+ /* Fetch the xlator from svc */
+ xlator = (xlator_t *) svc->mydata;
+ if (!xlator)
+ return (-1);
+
+ /* Reconfigure the volume specific rpc-auth.addr allow part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf (&srchkey, "rpc-auth.addr.%s.allow",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* If found the srchkey, delete old key/val pair
+ * and set the key with new value.
+ */
+ if (!dict_get_str (options, srchkey, &keyval)) {
+ dict_del (svc->options, srchkey);
+ ret = dict_set_str (svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "dict_set_str error");
+ GF_FREE (srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE (srchkey);
+ volentry = volentry->next;
+ }
+
+ /* Reconfigure the volume specific rpc-auth.addr reject part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf (&srchkey, "rpc-auth.addr.%s.reject",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* If found the srchkey, delete old key/val pair
+ * and set the key with new value.
+ */
+ if (!dict_get_str (options, srchkey, &keyval)) {
+ dict_del (svc->options, srchkey);
+ ret = dict_set_str (svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR,
+ "dict_set_str error");
+ GF_FREE (srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE (srchkey);
+ volentry = volentry->next;
+ }
+
+ ret = rpcsvc_init_options (svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_auth_reconf (svc, options);
+}
+
+int
rpcsvc_transport_unix_options_build (dict_t **options, char *filepath)
{
dict_t *dict = NULL;
@@ -1811,14 +2014,55 @@ rpcsvc_transport_unix_options_build (dict_t **options, char *filepath)
*options = dict;
out:
if (ret) {
- if (fpath)
- GF_FREE (fpath);
+ GF_FREE (fpath);
if (dict)
dict_unref (dict);
}
return ret;
}
+/*
+ * Reconfigure() the rpc.outstanding-rpc-limit param.
+ */
+int
+rpcsvc_set_outstanding_rpc_limit (rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1; /* FAILURE */
+ int rpclim = 0;
+ static char *rpclimkey = "rpc.outstanding-rpc-limit";
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ /* Reconfigure() the rpc.outstanding-rpc-limit param */
+ ret = dict_get_int32 (options, rpclimkey, &rpclim);
+ if (ret < 0) {
+ /* Fall back to default for FAILURE */
+ rpclim = RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT;
+ } else {
+ /* SUCCESS: round off to multiple of 8.
+ * If the input value fails Boundary check, fall back to
+ * default i.e. RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT.
+ * NB: value 0 is special, means its unset i.e. unlimited.
+ */
+ rpclim = ((rpclim + 8 - 1) >> 3) * 8;
+ if (rpclim < RPCSVC_MIN_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT;
+ } else if (rpclim > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT;
+ }
+ }
+
+ if (svc->outstanding_rpc_limit != rpclim) {
+ svc->outstanding_rpc_limit = rpclim;
+ gf_log (GF_RPCSVC, GF_LOG_INFO,
+ "Configured %s with value %d",
+ rpclimkey, rpclim);
+ }
+
+ return (0);
+}
+
/* The global RPC service initializer.
*/
rpcsvc_t *
@@ -1879,6 +2123,7 @@ rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
"failed to register DUMP program");
goto free_svc;
}
+
ret = 0;
free_svc:
if (ret == -1) {
@@ -1891,18 +2136,16 @@ free_svc:
int
-rpcsvc_transport_peer_check_search (dict_t *options, char *pattern, char *clstr)
+rpcsvc_transport_peer_check_search (dict_t *options, char *pattern,
+ char *ip, char *hostname)
{
- int ret = -1;
- char *addrtok = NULL;
- char *addrstr = NULL;
- char *dup_addrstr = NULL;
- char *svptr = NULL;
-
- if ((!options) || (!clstr))
- return -1;
+ int ret = -1;
+ char *addrtok = NULL;
+ char *addrstr = NULL;
+ char *dup_addrstr = NULL;
+ char *svptr = NULL;
- if (!dict_get (options, pattern))
+ if ((!options) || (!ip))
return -1;
ret = dict_get_str (options, pattern, &addrstr);
@@ -1922,85 +2165,85 @@ rpcsvc_transport_peer_check_search (dict_t *options, char *pattern, char *clstr)
/* CASEFOLD not present on Solaris */
#ifdef FNM_CASEFOLD
- ret = fnmatch (addrtok, clstr, FNM_CASEFOLD);
+ ret = fnmatch (addrtok, ip, FNM_CASEFOLD);
#else
- ret = fnmatch (addrtok, clstr, 0);
+ ret = fnmatch (addrtok, ip, 0);
#endif
if (ret == 0)
goto err;
+ /* compare hostnames if applicable */
+ if (hostname) {
+#ifdef FNM_CASEFOLD
+ ret = fnmatch (addrtok, hostname, FNM_CASEFOLD);
+#else
+ ret = fnmatch (addrtok, hostname, 0);
+#endif
+ if (ret == 0)
+ goto err;
+ }
+
addrtok = strtok_r (NULL, ",", &svptr);
}
ret = -1;
err:
- if (dup_addrstr)
- GF_FREE (dup_addrstr);
+ GF_FREE (dup_addrstr);
return ret;
}
-int
-rpcsvc_transport_peer_check_allow (dict_t *options, char *volname, char *clstr)
+static int
+rpcsvc_transport_peer_check_allow (dict_t *options, char *volname,
+ char *ip, char *hostname)
{
- int ret = RPCSVC_AUTH_DONTCARE;
+ int ret = RPCSVC_AUTH_DONTCARE;
char *srchstr = NULL;
- char globalrule[] = "rpc-auth.addr.allow";
- if ((!options) || (!clstr))
+ if ((!options) || (!ip) || (!volname))
return ret;
- /* If volname is NULL, then we're searching for the general rule to
- * determine the current address in clstr is allowed or not for all
- * subvolumes.
- */
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_DONTCARE;
- goto out;
- }
- } else
- srchstr = globalrule;
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_DONTCARE;
+ goto out;
+ }
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+ ret = rpcsvc_transport_peer_check_search (options, srchstr,
+ ip, hostname);
+ GF_FREE (srchstr);
if (ret == 0)
ret = RPCSVC_AUTH_ACCEPT;
else
- ret = RPCSVC_AUTH_DONTCARE;
+ ret = RPCSVC_AUTH_REJECT;
out:
return ret;
}
-int
-rpcsvc_transport_peer_check_reject (dict_t *options, char *volname, char *clstr)
+static int
+rpcsvc_transport_peer_check_reject (dict_t *options, char *volname,
+ char *ip, char *hostname)
{
- int ret = RPCSVC_AUTH_DONTCARE;
+ int ret = RPCSVC_AUTH_DONTCARE;
char *srchstr = NULL;
- char generalrule[] = "rpc-auth.addr.reject";
- if ((!options) || (!clstr))
+ if ((!options) || (!ip) || (!volname))
return ret;
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject",
- volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto out;
- }
- } else
- srchstr = generalrule;
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject",
+ volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto out;
+ }
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+ ret = rpcsvc_transport_peer_check_search (options, srchstr,
+ ip, hostname);
+ GF_FREE (srchstr);
if (ret == 0)
ret = RPCSVC_AUTH_REJECT;
@@ -2011,325 +2254,132 @@ out:
}
-/* This function tests the results of the allow rule and the reject rule to
- * combine them into a single result that can be used to determine if the
- * connection should be allowed to proceed.
- * Heres the test matrix we need to follow in this function.
- *
- * A - Allow, the result of the allow test. Never returns R.
- * R - Reject, result of the reject test. Never returns A.
- * Both can return D or dont care if no rule was given.
- *
- * | @allow | @reject | Result |
- * | A | R | R |
- * | D | D | D |
- * | A | D | A |
- * | D | R | R |
+/* Combines rpc auth's allow and reject options.
+ * Order of checks is important.
+ * First, REJECT if either rejects.
+ * If neither rejects, ACCEPT if either accepts.
+ * If neither accepts, DONTCARE
*/
int
rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
{
- int final = RPCSVC_AUTH_REJECT;
-
- /* If allowed rule allows but reject rule rejects, we stay cautious
- * and reject. */
- if ((allow == RPCSVC_AUTH_ACCEPT) && (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* if both are dont care, that is user did not specify for either allow
- * or reject, we leave it up to the general rule to apply, in the hope
- * that there is one.
- */
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- /* If one is dont care, the other one applies. */
- else if ((allow == RPCSVC_AUTH_ACCEPT) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
-
-/* Combines the result of the general rule test against, the specific rule
- * to determine final permission for the client's address.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | D |
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_addr_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
+ if (allow == RPCSVC_AUTH_REJECT ||
+ reject == RPCSVC_AUTH_REJECT)
+ return RPCSVC_AUTH_REJECT;
+ if (allow == RPCSVC_AUTH_ACCEPT ||
+ reject == RPCSVC_AUTH_ACCEPT)
+ return RPCSVC_AUTH_ACCEPT;
-/* Combines the result of the general rule test against, the specific rule
- * to determine final test for the connection coming in for a given volume.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | R |, special case, we intentionally disallow this.
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* On no rule, we reject. */
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
+ return RPCSVC_AUTH_DONTCARE;
}
-
int
-rpcsvc_transport_peer_check_name (dict_t *options, char *volname,
- rpc_transport_t *trans)
+rpcsvc_auth_check (rpcsvc_t *svc, char *volname,
+ rpc_transport_t *trans)
{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_REJECT;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
-
- if (!trans)
+ int ret = RPCSVC_AUTH_REJECT;
+ int accept = RPCSVC_AUTH_REJECT;
+ int reject = RPCSVC_AUTH_REJECT;
+ char *hostname = NULL;
+ char *ip = NULL;
+ char client_ip[RPCSVC_PEER_STRLEN] = {0};
+ char *allow_str = NULL;
+ char *reject_str = NULL;
+ char *srchstr = NULL;
+ dict_t *options = NULL;
+
+ if (!svc || !volname || !trans)
return ret;
- ret = rpcsvc_transport_peername (trans, clstr, RPCSVC_PEER_STRLEN);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-
-err:
- return ret;
-}
-
-
-int
-rpcsvc_transport_peer_check_addr (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_DONTCARE;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
- char *tmp = NULL;
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr *sockaddr = NULL;
-
- if (!trans)
+ /* Fetch the options from svc struct and validate */
+ options = svc->options;
+ if (!options)
return ret;
- ret = rpcsvc_transport_peeraddr (trans, clstr, RPCSVC_PEER_STRLEN,
- &sastorage, sizeof (sastorage));
+ ret = rpcsvc_transport_peername (trans, client_ip, RPCSVC_PEER_STRLEN);
if (ret != 0) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
"%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- sockaddr = (struct sockaddr *) &sastorage;
- switch (sockaddr->sa_family) {
-
- case AF_INET:
- case AF_INET6:
- tmp = strrchr (clstr, ':');
- if (tmp)
- *tmp = '\0';
- break;
- }
-
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-err:
- return ret;
-}
-
-
-int
-rpcsvc_transport_check_volume_specific (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int namechk = RPCSVC_AUTH_REJECT;
- int addrchk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!volname) || (!trans))
return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
}
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
+ /* Accept if its the default case: Allow all, Reject none
+ * The default volfile always contains a 'allow *' rule
+ * for each volume. If allow rule is missing (which implies
+ * there is some bad volfile generating code doing this), we
+ * assume no one is allowed mounts, and thus, we reject mounts.
*/
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, volname,
- trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, volname, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk,
- namechk);
- else
- ret = addrchk;
-
- return ret;
-}
-
-
-int
-rpcsvc_transport_check_volume_general (dict_t *options, rpc_transport_t *trans)
-{
- int addrchk = RPCSVC_AUTH_REJECT;
- int namechk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!trans))
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
}
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, NULL, trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, NULL, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
- else
- ret = addrchk;
+ ret = dict_get_str (options, srchstr, &allow_str);
+ GF_FREE (srchstr);
+ if (ret < 0)
+ return RPCSVC_AUTH_REJECT;
- return ret;
-}
+ ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
-int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int general_chk = RPCSVC_AUTH_REJECT;
- int specific_chk = RPCSVC_AUTH_REJECT;
+ ret = dict_get_str (options, srchstr, &reject_str);
+ GF_FREE (srchstr);
+ if (reject_str == NULL && !strcmp ("*", allow_str))
+ return RPCSVC_AUTH_ACCEPT;
+
+ /* Non-default rule, authenticate */
+ if (!get_host_name (client_ip, &ip))
+ ip = client_ip;
+
+ /* addr-namelookup check */
+ if (svc->addr_namelookup == _gf_true) {
+ ret = gf_get_hostname_from_ip (ip, &hostname);
+ if (ret) {
+ if (hostname)
+ GF_FREE (hostname);
+ /* failed to get hostname, but hostname auth
+ * is enabled, so authentication will not be
+ * 100% correct. reject mounts
+ */
+ return RPCSVC_AUTH_REJECT;
+ }
+ }
- if ((!options) || (!volname) || (!trans))
- return RPCSVC_AUTH_REJECT;
+ accept = rpcsvc_transport_peer_check_allow (options, volname,
+ ip, hostname);
- general_chk = rpcsvc_transport_check_volume_general (options, trans);
- specific_chk = rpcsvc_transport_check_volume_specific (options, volname,
- trans);
+ reject = rpcsvc_transport_peer_check_reject (options, volname,
+ ip, hostname);
- return rpcsvc_combine_gen_spec_volume_checks (general_chk,
- specific_chk);
+ if (hostname)
+ GF_FREE (hostname);
+ return rpcsvc_combine_allow_reject_volume_check (accept, reject);
}
-
int
rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
rpc_transport_t *trans)
{
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr_in *sa = NULL;
+ union gf_sock_union sock_union;
int ret = RPCSVC_AUTH_REJECT;
- socklen_t sasize = sizeof (sa);
+ socklen_t sinsize = sizeof (&sock_union.sin);
char *srchstr = NULL;
char *valstr = NULL;
- int globalinsecure = RPCSVC_AUTH_REJECT;
- int exportinsecure = RPCSVC_AUTH_DONTCARE;
uint16_t port = 0;
gf_boolean_t insecure = _gf_false;
+ memset (&sock_union, 0, sizeof (sock_union));
+
if ((!svc) || (!volname) || (!trans))
return ret;
- sa = (struct sockaddr_in*) &sastorage;
- ret = rpcsvc_transport_peeraddr (trans, NULL, 0, &sastorage,
- sasize);
+ ret = rpcsvc_transport_peeraddr (trans, NULL, 0, &sock_union.storage,
+ sinsize);
if (ret != 0) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
gai_strerror (ret));
@@ -2337,7 +2387,7 @@ rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
goto err;
}
- port = ntohs (sa->sin_port);
+ port = ntohs (sock_union.sin.sin_port);
gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
/* If the port is already a privileged one, dont bother with checking
* options.
@@ -2348,23 +2398,6 @@ rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
}
/* Disabled by default */
- if ((dict_get (svc->options, "rpc-auth.ports.insecure"))) {
- ret = dict_get_str (svc->options, "rpc-auth.ports.insecure"
- , &srchstr);
- if (ret == 0) {
- ret = gf_string2boolean (srchstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- globalinsecure = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- /* Disabled by default */
ret = gf_asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
if (ret == -1) {
gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
@@ -2372,25 +2405,22 @@ rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
goto err;
}
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- exportinsecure = RPCSVC_AUTH_ACCEPT;
- else
- exportinsecure = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
-
- ret = rpcsvc_combine_gen_spec_volume_checks (globalinsecure,
- exportinsecure);
+ ret = dict_get_str (svc->options, srchstr, &valstr);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " read rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = gf_string2boolean (valstr, &insecure);
+ if (ret) {
+ gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
+ " convert rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = insecure ? RPCSVC_AUTH_ACCEPT : RPCSVC_AUTH_REJECT;
+
if (ret == RPCSVC_AUTH_ACCEPT)
gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
else
@@ -2398,6 +2428,9 @@ rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
" allowed");
err:
+ if (srchstr)
+ GF_FREE (srchstr);
+
return ret;
}
@@ -2425,17 +2458,16 @@ rpcsvc_volume_allowed (dict_t *options, char *volname)
ret = dict_get_str (options, srchstr, &addrstr);
out:
- if (srchstr)
- GF_FREE (srchstr);
+ GF_FREE (srchstr);
return addrstr;
}
rpcsvc_actor_t gluster_dump_actors[] = {
- [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, NULL, 0},
- [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, NULL, 0},
- [GF_DUMP_MAXVALUE] = {"MAXVALUE", GF_DUMP_MAXVALUE, NULL, NULL, NULL, 0},
+ [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, 0, DRC_NA},
+ [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, 0, DRC_NA},
+ [GF_DUMP_MAXVALUE] = {"MAXVALUE", GF_DUMP_MAXVALUE, NULL, NULL, 0, DRC_NA},
};
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index 847ec1b33..cbc1f4226 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -38,6 +38,10 @@
#define MAX_IOVEC 16
#endif
+#define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT 64
+#define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536
+#define RPCSVC_MIN_OUTSTANDING_RPC_LIMIT 0 /* No limit i.e. Unlimited */
+
#define GF_RPCSVC "rpc-service"
#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
@@ -48,6 +52,10 @@
#define RPCSVC_POOLCOUNT_MULT 64
#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPC_ROOT_UID 0
+#define RPC_ROOT_GID 0
+#define RPC_NOBODY_UID 65534
+#define RPC_NOBODY_GID 65534
/* RPC Record States */
#define RPCSVC_READ_FRAGHDR 1
@@ -136,6 +144,9 @@ typedef struct rpcsvc_auth_data {
#define rpcsvc_auth_flavour(au) ((au).flavour)
+typedef struct drc_client drc_client_t;
+typedef struct drc_cached_op drc_cached_op_t;
+
/* The container for the RPC call handed up to an actor.
* Dynamically allocated. Lives till the call reply is completely
* transmitted.
@@ -174,7 +185,9 @@ struct rpcsvc_request {
/* Might want to move this to AUTH_UNIX specific state since this array
* is not available for every authentication scheme.
*/
- gid_t auxgids[GF_MAX_AUX_GROUPS];
+ gid_t *auxgids;
+ gid_t auxgidsmall[SMALL_GROUP_COUNT];
+ gid_t *auxgidlarge;
int auxgidcount;
@@ -224,6 +237,9 @@ struct rpcsvc_request {
*/
rpcsvc_auth_data_t verf;
+ /* Execute this request's actor function as a synctask? */
+ gf_boolean_t synctask;
+
/* Container for a RPC program wanting to store a temp
* request-specific item.
*/
@@ -231,6 +247,12 @@ struct rpcsvc_request {
/* Container for transport to store request-specific item */
void *trans_private;
+
+ /* we need to ref the 'iobuf' in case of 'synctasking' it */
+ struct iobuf *hdr_iobuf;
+
+ /* pointer to cached reply for use in DRC */
+ drc_cached_op_t *reply;
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
@@ -238,8 +260,6 @@ struct rpcsvc_request {
#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->prog))->private)
#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
@@ -257,7 +277,22 @@ struct rpcsvc_request {
#define rpcsvc_request_vecstate(req) ((req)->vecstate)
#define rpcsvc_request_transport(req) ((req)->trans)
#define rpcsvc_request_transport_ref(req) (rpc_transport_ref((req)->trans))
-
+#define RPC_AUTH_ROOT_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->root_squash) { \
+ if (req->uid == RPC_ROOT_UID) \
+ req->uid = RPC_NOBODY_UID; \
+ if (req->gid == RPC_ROOT_GID) \
+ req->gid = RPC_NOBODY_GID; \
+ for (gidcount = 0; gidcount < req->auxgidcount; \
+ ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = \
+ RPC_NOBODY_GID; \
+ } \
+ } \
+ } while (0);
#define RPCSVC_ACTOR_SUCCESS 0
#define RPCSVC_ACTOR_ERROR (-1)
@@ -276,8 +311,6 @@ struct rpcsvc_request {
*
*/
typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
-typedef int (*rpcsvc_vector_actor) (rpcsvc_request_t *req, struct iovec *vec,
- int count, struct iobref *iobref);
typedef int (*rpcsvc_vector_sizer) (int state, ssize_t *readsize,
char *base_addr, char *curr_addr);
@@ -293,7 +326,6 @@ typedef void *(*rpcsvc_encode_reply) (void *msg);
*/
typedef void (*rpcsvc_deallocate_reply) (void *msg);
-
#define RPCSVC_NAME_MAX 32
/* The descriptor for each procedure/actor that runs
* over the RPC service.
@@ -311,11 +343,11 @@ typedef struct rpcsvc_actor_desc {
* handler for letting the RPC program read the data from the network
* directly into its aligned buffers.
*/
- rpcsvc_vector_actor vector_actor;
rpcsvc_vector_sizer vector_sizer;
/* Can actor be ran on behalf an unprivileged requestor? */
gf_boolean_t unprivileged;
+ drc_op_type_t op_type;
} rpcsvc_actor_t;
/* Describes a program and its version along with the function pointers
@@ -370,6 +402,9 @@ struct rpcsvc_program {
*/
int min_auth;
+ /* Execute actor function as a synctask? */
+ gf_boolean_t synctask;
+
/* list member to link to list of registered services with rpcsvc */
struct list_head program;
};
@@ -406,6 +441,9 @@ extern int
rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port);
extern int
+rpcsvc_program_unregister_portmap (rpcsvc_program_t *newprog);
+
+extern int
rpcsvc_register_portmap_enabled (rpcsvc_t *svc);
/* Inits the global RPC service data structures.
@@ -415,6 +453,9 @@ extern rpcsvc_t *
rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
uint32_t poolcount);
+extern int
+rpcsvc_reconfigure_options (rpcsvc_t *svc, dict_t *options);
+
int
rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
@@ -425,6 +466,13 @@ int
rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
int
+rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv);
+
+int
rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
int hdrcount, struct iovec *payload, int payloadcount,
struct iobref *iobref);
@@ -445,17 +493,17 @@ rpcsvc_error_reply (rpcsvc_request_t *req);
extern int
rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen);
-extern inline int
+extern int
rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
struct sockaddr_storage *returnsa, socklen_t sasize);
extern int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans);
+rpcsvc_auth_check (rpcsvc_t *svc, char *volname, rpc_transport_t *trans);
extern int
rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
rpc_transport_t *trans);
+
#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
@@ -511,6 +559,9 @@ extern int
rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
extern int
+rpcsvc_auth_reconf (rpcsvc_t *svc, dict_t *options);
+
+extern int
rpcsvc_auth_transport_init (rpc_transport_t *xprt);
extern int
@@ -527,9 +578,6 @@ rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
extern gid_t *
rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
-extern int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec);
-
extern char *
rpcsvc_volume_allowed (dict_t *options, char *volname);
@@ -537,16 +585,22 @@ int rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
rpcsvc_cbk_program_t *prog, int procnum,
struct iovec *proghdr, int proghdrcount);
+rpcsvc_actor_t *
+rpcsvc_program_actor (rpcsvc_request_t *req);
+
int
rpcsvc_transport_unix_options_build (dict_t **options, char *filepath);
int
rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options);
int
+rpcsvc_set_addr_namelookup (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_outstanding_rpc_limit (rpcsvc_t *svc, dict_t *options);
+int
rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
rpcsvc_vector_sizer
rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
uint32_t progver, uint32_t procnum);
-
#endif
diff --git a/rpc/rpc-lib/src/xdr-rpc.c b/rpc/rpc-lib/src/xdr-rpc.c
index ef52764c3..adb48a531 100644
--- a/rpc/rpc-lib/src/xdr-rpc.c
+++ b/rpc/rpc-lib/src/xdr-rpc.c
@@ -34,7 +34,7 @@ xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
struct iovec *payload, char *credbytes, char *verfbytes)
{
XDR xdr;
- char opaquebytes[MAX_AUTH_BYTES];
+ char opaquebytes[GF_MAX_AUTH_BYTES];
struct opaque_auth *oa = NULL;
int ret = -1;
diff --git a/rpc/rpc-transport/rdma/src/Makefile.am b/rpc/rpc-transport/rdma/src/Makefile.am
index b4b940bca..2bf7cf238 100644
--- a/rpc/rpc-transport/rdma/src/Makefile.am
+++ b/rpc/rpc-transport/rdma/src/Makefile.am
@@ -3,18 +3,20 @@
transport_LTLIBRARIES = rdma.la
transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-rdma_la_LDFLAGS = -module -avoidversion
+rdma_la_LDFLAGS = -module -avoid-version
rdma_la_SOURCES = rdma.c name.c
rdma_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -libverbs
+ -libverbs -lrdmacm
noinst_HEADERS = rdma.h name.h
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
-I$(top_srcdir)/xlators/protocol/lib/src/ -shared -nostartfiles $(GF_CFLAGS)
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src -shared -nostartfiles $(GF_CFLAGS)
+ -I$(top_srcdir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-transport/rdma/src/name.c b/rpc/rpc-transport/rdma/src/name.c
index e5f7ba2ca..c57428ad6 100644
--- a/rpc/rpc-transport/rdma/src/name.c
+++ b/rpc/rpc-transport/rdma/src/name.c
@@ -13,12 +13,7 @@
#include <errno.h>
#include <netdb.h>
#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
+#include <rdma/rdma_cma.h>
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
@@ -26,6 +21,8 @@
#include "rpc-transport.h"
#include "rdma.h"
+#include "common-utils.h"
+
int32_t
gf_resolve_ip6 (const char *hostname,
@@ -35,28 +32,43 @@ gf_resolve_ip6 (const char *hostname,
struct addrinfo **addr_info);
static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
+af_inet_bind_to_port_lt_ceiling (struct rdma_cm_id *cm_id,
+ struct sockaddr *sockaddr,
socklen_t sockaddr_len, int ceiling)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ // by default assume none of the ports are blocked and all are available
+ gf_boolean_t ports[1024] = {_gf_false,};
+ int i = 0;
+
+ ret = gf_process_reserved_ports (ports);
+ if (ret != 0) {
+ for (i = 0; i < 1024; i++)
+ ports[i] = _gf_false;
+ }
while (port)
{
switch (sockaddr->sa_family)
{
case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
+ ((struct sockaddr_in6 *)sockaddr)->sin6_port
+ = htons (port);
break;
case AF_INET_SDP:
case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
+ ((struct sockaddr_in *)sockaddr)->sin_port
+ = htons (port);
break;
}
-
- ret = bind (fd, sockaddr, sockaddr_len);
+ // ignore the reserved ports
+ if (ports[port] == _gf_true) {
+ port--;
+ continue;
+ }
+ ret = rdma_bind_addr (cm_id, sockaddr);
if (ret == 0)
break;
@@ -70,11 +82,10 @@ af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
return ret;
}
+#if 0
static int32_t
-af_unix_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
+af_unix_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, struct rdma_cm_id *cm_id)
{
data_t *path_data = NULL;
struct sockaddr_un *addr = NULL;
@@ -106,6 +117,7 @@ af_unix_client_bind (rpc_transport_t *this,
err:
return ret;
}
+#endif
static int32_t
client_fill_address_family (rpc_transport_t *this, struct sockaddr *sockaddr)
@@ -153,13 +165,11 @@ client_fill_address_family (rpc_transport_t *this, struct sockaddr *sockaddr)
sockaddr->sa_family = AF_INET6;
} else if (!strcasecmp (address_family, "inet-sdp")) {
sockaddr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- sockaddr->sa_family = AF_UNSPEC;
} else {
gf_log (this->name, GF_LOG_ERROR,
"unknown address-family (%s) specified",
address_family);
+ sockaddr->sa_family = AF_UNSPEC;
return -1;
}
}
@@ -354,6 +364,8 @@ af_inet_server_get_local_sockaddr (rpc_transport_t *this,
if (listen_port_data) {
listen_port = data_to_uint16 (listen_port_data);
} else {
+ listen_port = GF_DEFAULT_RDMA_LISTEN_PORT;
+
if (addr->sa_family == AF_INET6) {
struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
in->sin6_addr = in6addr_any;
@@ -404,10 +416,8 @@ out:
}
int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
+gf_rdma_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, struct rdma_cm_id *cm_id)
{
int ret = 0;
@@ -419,22 +429,24 @@ gf_rdma_client_bind (rpc_transport_t *this,
*sockaddr_len = sizeof (struct sockaddr_in);
case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
+ ret = af_inet_bind_to_port_lt_ceiling (cm_id, sockaddr,
*sockaddr_len,
- CLIENT_PORT_CEILING);
+ GF_CLIENT_PORT_CEILING);
if (ret == -1) {
gf_log (this->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port "
- "less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
+ "cannot bind rdma_cm_id to port "
+ "less than %d (%s)", GF_CLIENT_PORT_CEILING,
+ strerror (errno));
ret = 0;
}
break;
case AF_UNIX:
*sockaddr_len = sizeof (struct sockaddr_un);
+#if 0
ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
*sockaddr_len, sock);
+#endif
break;
default:
@@ -521,21 +533,19 @@ gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
addr->sa_family = AF_INET_SDP;
} else if (!strcasecmp (address_family, "unix")) {
addr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet/inet6")
- || !strcasecmp (address_family, "inet6/inet")) {
- addr->sa_family = AF_UNSPEC;
} else {
gf_log (this->name, GF_LOG_ERROR,
"unknown address family (%s) specified",
address_family);
+ addr->sa_family = AF_UNSPEC;
ret = -1;
goto err;
}
} else {
gf_log (this->name, GF_LOG_DEBUG,
"option address-family not specified, defaulting "
- "to inet/inet6");
- addr->sa_family = AF_UNSPEC;
+ "to inet");
+ addr->sa_family = AF_INET;
}
switch (addr->sa_family)
diff --git a/rpc/rpc-transport/rdma/src/name.h b/rpc/rpc-transport/rdma/src/name.h
index 114ed1661..742fc5fc3 100644
--- a/rpc/rpc-transport/rdma/src/name.h
+++ b/rpc/rpc-transport/rdma/src/name.h
@@ -11,16 +11,13 @@
#ifndef _IB_VERBS_NAME_H
#define _IB_VERBS_NAME_H
-#include <sys/socket.h>
-#include <sys/un.h>
+#include <rdma/rdma_cma.h>
#include "compat.h"
int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
+gf_rdma_client_bind (rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, struct rdma_cm_id *cm_id);
int32_t
gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
index 7fce1f15c..6e6099a98 100644
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ b/rpc/rpc-transport/rdma/src/rdma.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -21,6 +20,7 @@
#include "name.h"
#include "byte-order.h"
#include "xlator.h"
+#include "xdr-rpc.h"
#include <signal.h>
#define GF_RDMA_LOG_NAME "rpc-transport/rdma"
@@ -34,99 +34,29 @@ gf_rdma_post_ref (gf_rdma_post_t *post);
int
gf_rdma_post_unref (gf_rdma_post_t *post);
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static uint16_t
-gf_rdma_get_local_lid (struct ibv_context *context,
- int32_t port)
-{
- struct ibv_port_attr attr;
+static void *
+gf_rdma_send_completion_proc (void *data);
- if (ibv_query_port (context, port, &attr))
- return 0;
+static void *
+gf_rdma_recv_completion_proc (void *data);
- return attr.lid;
-}
-
-static const char *
-get_port_state_str(enum ibv_port_state pstate)
-{
- switch (pstate) {
- case IBV_PORT_DOWN: return "PORT_DOWN";
- case IBV_PORT_INIT: return "PORT_INIT";
- case IBV_PORT_ARMED: return "PORT_ARMED";
- case IBV_PORT_ACTIVE: return "PORT_ACTIVE";
- case IBV_PORT_ACTIVE_DEFER: return "PORT_ACTIVE_DEFER";
- default: return "invalid state";
- }
-}
+void *
+gf_rdma_async_event_thread (void *context);
static int32_t
-ib_check_active_port (struct ibv_context *ctx, uint8_t port)
-{
- struct ibv_port_attr port_attr = {0, };
- int32_t ret = 0;
- const char *state_str = NULL;
-
- if (!ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
-
- ret = ibv_query_port (ctx, port, &port_attr);
-
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query port %u properties", port);
- return -1;
- }
-
- state_str = get_port_state_str (port_attr.state);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Infiniband PORT: (%u) STATE: (%s)",
- port, state_str);
-
- if (port_attr.state == IBV_PORT_ACTIVE)
- return 0;
-
- return -1;
-}
+gf_rdma_create_qp (rpc_transport_t *this);
static int32_t
-ib_get_active_port (struct ibv_context *ib_ctx)
-{
- struct ibv_device_attr ib_device_attr = {{0, }, };
- int32_t ret = -1;
- uint8_t ib_port = 0;
+__gf_rdma_teardown (rpc_transport_t *this);
- if (!ib_ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
- if (ibv_query_device (ib_ctx, &ib_device_attr)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query device properties");
- return -1;
- }
+static int32_t
+gf_rdma_teardown (rpc_transport_t *this);
- for (ib_port = 1; ib_port <= ib_device_attr.phys_port_cnt; ++ib_port) {
- ret = ib_check_active_port (ib_ctx, ib_port);
- if (ret == 0)
- return ib_port;
+static int32_t
+gf_rdma_disconnect (rpc_transport_t *this);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Port:(%u) not active", ib_port);
- continue;
- }
- return ret;
-}
+static void
+gf_rdma_cm_handle_disconnect (rpc_transport_t *this);
static void
@@ -156,7 +86,7 @@ gf_rdma_put_post (gf_rdma_queue_t *queue, gf_rdma_post_t *post)
static gf_rdma_post_t *
-gf_rdma_new_post (gf_rdma_device_t *device, int32_t len,
+gf_rdma_new_post (rpc_transport_t *this, gf_rdma_device_t *device, int32_t len,
gf_rdma_post_type_t type)
{
gf_rdma_post_t *post = NULL;
@@ -183,7 +113,7 @@ gf_rdma_new_post (gf_rdma_device_t *device, int32_t len,
post->buf_size,
IBV_ACCESS_LOCAL_WRITE);
if (!post->mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ gf_log (this->name, GF_LOG_WARNING,
"memory registration failed (%s)",
strerror (errno));
goto out;
@@ -195,9 +125,7 @@ gf_rdma_new_post (gf_rdma_device_t *device, int32_t len,
ret = 0;
out:
if (ret != 0) {
- if (post->buf != NULL) {
- free (post->buf);
- }
+ free (post->buf);
GF_FREE (post);
post = NULL;
@@ -260,22 +188,6 @@ __gf_rdma_quota_get (gf_rdma_peer_t *peer)
return ret;
}
-/*
- static int32_t
- gf_rdma_quota_get (gf_rdma_peer_t *peer)
- {
- int32_t ret = -1;
- gf_rdma_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_quota_get (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
- }
-*/
static void
__gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry)
@@ -291,6 +203,7 @@ __gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry)
iobref_unref (entry->msg.request.rsp_iobref);
entry->msg.request.rsp_iobref = NULL;
}
+
mem_put (entry);
}
@@ -310,26 +223,898 @@ static int32_t
__gf_rdma_disconnect (rpc_transport_t *this)
{
gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
priv = this->private;
- if (priv->connected || priv->tcp_connected) {
- fcntl (priv->sock, F_SETFL, O_NONBLOCK);
- if (shutdown (priv->sock, SHUT_RDWR) != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "shutdown () - error: %s",
- strerror (errno));
- ret = -errno;
- priv->tcp_connected = 0;
+ if (priv->connected) {
+ rdma_disconnect (priv->peer.cm_id);
+ }
+
+ return 0;
+}
+
+
+static void
+gf_rdma_queue_init (gf_rdma_queue_t *queue)
+{
+ pthread_mutex_init (&queue->lock, NULL);
+
+ queue->active_posts.next = &queue->active_posts;
+ queue->active_posts.prev = &queue->active_posts;
+ queue->passive_posts.next = &queue->passive_posts;
+ queue->passive_posts.prev = &queue->passive_posts;
+}
+
+
+static void
+__gf_rdma_destroy_queue (gf_rdma_post_t *post)
+{
+ gf_rdma_post_t *tmp = NULL;
+
+ while (post->next != post) {
+ tmp = post->next;
+
+ post->next = post->next->next;
+ post->next->prev = post;
+
+ gf_rdma_destroy_post (tmp);
+ }
+}
+
+
+static void
+gf_rdma_destroy_queue (gf_rdma_queue_t *queue)
+{
+ if (queue == NULL) {
+ goto out;
+ }
+
+ pthread_mutex_lock (&queue->lock);
+ {
+ if (queue->passive_count > 0) {
+ __gf_rdma_destroy_queue (&queue->passive_posts);
+ queue->passive_count = 0;
+ }
+
+ if (queue->active_count > 0) {
+ __gf_rdma_destroy_queue (&queue->active_posts);
+ queue->active_count = 0;
+ }
+ }
+ pthread_mutex_unlock (&queue->lock);
+
+out:
+ return;
+}
+
+
+static void
+gf_rdma_destroy_posts (rpc_transport_t *this)
+{
+ gf_rdma_device_t *device = NULL;
+ gf_rdma_private_t *priv = NULL;
+
+ if (this == NULL) {
+ goto out;
+ }
+
+ priv = this->private;
+ device = priv->device;
+
+ gf_rdma_destroy_queue (&device->sendq);
+ gf_rdma_destroy_queue (&device->recvq);
+
+out:
+ return;
+}
+
+
+static int32_t
+__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size,
+ gf_rdma_queue_t *q, gf_rdma_post_type_t type)
+{
+ int32_t i = 0;
+ int32_t ret = 0;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ for (i=0 ; i<count ; i++) {
+ gf_rdma_post_t *post = NULL;
+
+ post = gf_rdma_new_post (this, device, size + 2048, type);
+ if (!post) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "post creation failed");
+ ret = -1;
+ break;
+ }
+
+ gf_rdma_put_post (q, post);
+ }
+ return ret;
+}
+
+
+static int32_t
+gf_rdma_post_recv (struct ibv_srq *srq,
+ gf_rdma_post_t *post)
+{
+ struct ibv_sge list = {
+ .addr = (unsigned long) post->buf,
+ .length = post->buf_size,
+ .lkey = post->mr->lkey
+ };
+
+ struct ibv_recv_wr wr = {
+ .wr_id = (unsigned long) post,
+ .sg_list = &list,
+ .num_sge = 1,
+ }, *bad_wr;
+
+ gf_rdma_post_ref (post);
+
+ return ibv_post_srq_recv (srq, &wr, &bad_wr);
+}
+
+
+static int32_t
+gf_rdma_create_posts (rpc_transport_t *this)
+{
+ int32_t i = 0, ret = 0;
+ gf_rdma_post_t *post = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ options = &priv->options;
+ device = priv->device;
+
+ ret = __gf_rdma_create_posts (this, options->send_count,
+ options->send_size,
+ &device->sendq, GF_RDMA_SEND_POST);
+ if (!ret)
+ ret = __gf_rdma_create_posts (this, options->recv_count,
+ options->recv_size,
+ &device->recvq,
+ GF_RDMA_RECV_POST);
+
+ if (!ret) {
+ for (i=0 ; i<options->recv_count ; i++) {
+ post = gf_rdma_get_post (&device->recvq);
+ if (gf_rdma_post_recv (device->srq, post) != 0) {
+ ret = -1;
+ break;
+ }
+ }
+ }
+
+ if (ret)
+ gf_rdma_destroy_posts (this);
+
+ return ret;
+}
+
+
+static void
+gf_rdma_destroy_cq (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+
+ priv = this->private;
+ device = priv->device;
+
+ if (device->recv_cq)
+ ibv_destroy_cq (device->recv_cq);
+ device->recv_cq = NULL;
+
+ if (device->send_cq)
+ ibv_destroy_cq (device->send_cq);
+ device->send_cq = NULL;
+
+ return;
+}
+
+
+static int32_t
+gf_rdma_create_cq (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ gf_rdma_device_t *device = NULL;
+ uint64_t send_cqe = 0;
+ int32_t ret = 0;
+ struct ibv_device_attr device_attr = {{0}, };
+
+ priv = this->private;
+ options = &priv->options;
+ device = priv->device;
+
+ device->recv_cq = ibv_create_cq (priv->device->context,
+ options->recv_count * 2,
+ device,
+ device->recv_chan,
+ 0);
+ if (!device->recv_cq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of CQ for device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_req_notify_cq on recv CQ of device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ }
+
+ do {
+ ret = ibv_query_device (priv->device->context, &device_attr);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_query_device on %s returned %d (%s)",
+ priv->device->device_name, ret,
+ (ret > 0) ? strerror (ret) : "");
+ ret = -1;
+ goto out;
+ }
+
+ send_cqe = options->send_count * 128;
+ send_cqe = (send_cqe > device_attr.max_cqe)
+ ? device_attr.max_cqe : send_cqe;
+
+ /* TODO: make send_cq size dynamically adaptive */
+ device->send_cq = ibv_create_cq (priv->device->context,
+ send_cqe, device,
+ device->send_chan, 0);
+ if (!device->send_cq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "creation of send_cq for device %s failed",
+ device->device_name);
+ ret = -1;
+ goto out;
+ }
+
+ if (ibv_req_notify_cq (device->send_cq, 0)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "ibv_req_notify_cq on send_cq for device %s"
+ " failed", device->device_name);
+ ret = -1;
+ goto out;
+ }
+ } while (0);
+
+out:
+ if (ret != 0)
+ gf_rdma_destroy_cq (this);
+
+ return ret;
+}
+
+
+static gf_rdma_device_t *
+gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx,
+ char *device_name)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+ int32_t ret = 0;
+ int32_t i = 0;
+ gf_rdma_device_t *trav = NULL, *device = NULL;
+ gf_rdma_ctx_t *rdma_ctx = NULL;
+
+ priv = this->private;
+ options = &priv->options;
+ ctx = this->ctx;
+ rdma_ctx = ctx->ib;
+
+ trav = rdma_ctx->device;
+
+ while (trav) {
+ if (!strcmp (trav->device_name, device_name))
+ break;
+ trav = trav->next;
+ }
+
+ if (!trav) {
+ trav = GF_CALLOC (1, sizeof (*trav),
+ gf_common_mt_rdma_device_t);
+ if (trav == NULL) {
+ goto out;
+ }
+
+ priv->device = trav;
+ trav->context = ibctx;
+
+ trav->request_ctx_pool
+ = mem_pool_new (gf_rdma_request_context_t,
+ GF_RDMA_POOL_SIZE);
+ if (trav->request_ctx_pool == NULL) {
+ goto out;
+ }
+
+ trav->ioq_pool
+ = mem_pool_new (gf_rdma_ioq_t, GF_RDMA_POOL_SIZE);
+ if (trav->ioq_pool == NULL) {
+ goto out;
+ }
+
+ trav->reply_info_pool = mem_pool_new (gf_rdma_reply_info_t,
+ GF_RDMA_POOL_SIZE);
+ if (trav->reply_info_pool == NULL) {
+ goto out;
+ }
+
+ trav->device_name = gf_strdup (device_name);
+
+ trav->next = rdma_ctx->device;
+ rdma_ctx->device = trav;
+
+ trav->send_chan = ibv_create_comp_channel (trav->context);
+ if (!trav->send_chan) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create send completion channel for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ trav->recv_chan = ibv_create_comp_channel (trav->context);
+ if (!trav->recv_chan) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create recv completion channel for "
+ "device (%s)", device_name);
+
+ /* TODO: cleanup current mess */
+ goto out;
+ }
+
+ if (gf_rdma_create_cq (this) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create CQ for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* protection domain */
+ trav->pd = ibv_alloc_pd (trav->context);
+
+ if (!trav->pd) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate protection domain for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ struct ibv_srq_init_attr attr = {
+ .attr = {
+ .max_wr = options->recv_count,
+ .max_sge = 1,
+ .srq_limit = 10
+ }
+ };
+ trav->srq = ibv_create_srq (trav->pd, &attr);
+
+ if (!trav->srq) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create SRQ for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* queue init */
+ gf_rdma_queue_init (&trav->sendq);
+ gf_rdma_queue_init (&trav->recvq);
+
+ if (gf_rdma_create_posts (this) < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not allocate posts for device (%s)",
+ device_name);
+ goto out;
+ }
+
+ /* completion threads */
+ ret = gf_thread_create (&trav->send_thread, NULL,
+ gf_rdma_send_completion_proc,
+ trav->send_chan);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create send completion thread for "
+ "device (%s)", device_name);
+ goto out;
+ }
+
+ ret = gf_thread_create (&trav->recv_thread, NULL,
+ gf_rdma_recv_completion_proc,
+ trav->recv_chan);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create recv completion thread "
+ "for device (%s)", device_name);
+ return NULL;
+ }
+
+ ret = gf_thread_create (&trav->async_event_thread, NULL,
+ gf_rdma_async_event_thread,
+ ibctx);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not create async_event_thread");
+ return NULL;
+ }
+
+ /* qpreg */
+ pthread_mutex_init (&trav->qpreg.lock, NULL);
+ for (i=0; i<42; i++) {
+ trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
+ trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
+ }
+ }
+
+ device = trav;
+ trav = NULL;
+out:
+
+ if (trav != NULL) {
+ gf_rdma_destroy_posts (this);
+ mem_pool_destroy (trav->ioq_pool);
+ mem_pool_destroy (trav->request_ctx_pool);
+ mem_pool_destroy (trav->reply_info_pool);
+ ibv_dealloc_pd (trav->pd);
+ gf_rdma_destroy_cq (this);
+ ibv_destroy_comp_channel (trav->recv_chan);
+ ibv_destroy_comp_channel (trav->send_chan);
+ GF_FREE ((char *)trav->device_name);
+ GF_FREE (trav);
+ }
+
+ return device;
+}
+
+
+static rpc_transport_t *
+gf_rdma_transport_new (rpc_transport_t *listener, struct rdma_cm_id *cm_id)
+{
+ gf_rdma_private_t *listener_priv = NULL, *priv = NULL;
+ rpc_transport_t *this = NULL, *new = NULL;
+ gf_rdma_options_t *options = NULL;
+ char *device_name = NULL;
+
+ listener_priv = listener->private;
+
+ this = GF_CALLOC (1, sizeof (rpc_transport_t),
+ gf_common_mt_rpc_transport_t);
+ if (this == NULL) {
+ goto out;
+ }
+
+ this->listener = listener;
+
+ priv = GF_CALLOC (1, sizeof (gf_rdma_private_t),
+ gf_common_mt_rdma_private_t);
+ if (priv == NULL) {
+ goto out;
+ }
+
+ this->private = priv;
+ priv->options = listener_priv->options;
+
+ priv->listener = listener;
+ priv->entity = GF_RDMA_SERVER;
+
+ options = &priv->options;
+
+ this->ops = listener->ops;
+ this->init = listener->init;
+ this->fini = listener->fini;
+ this->ctx = listener->ctx;
+ this->name = gf_strdup (listener->name);
+ this->notify = listener->notify;
+ this->mydata = listener->mydata;
+
+ this->myinfo.sockaddr_len = sizeof (cm_id->route.addr.src_addr);
+ memcpy (&this->myinfo.sockaddr, &cm_id->route.addr.src_addr,
+ this->myinfo.sockaddr_len);
+
+ this->peerinfo.sockaddr_len = sizeof (cm_id->route.addr.dst_addr);
+ memcpy (&this->peerinfo.sockaddr, &cm_id->route.addr.dst_addr,
+ this->peerinfo.sockaddr_len);
+
+ priv->peer.trans = this;
+ gf_rdma_get_transport_identifiers (this);
+
+ device_name = (char *)ibv_get_device_name (cm_id->verbs->device);
+ if (device_name == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "cannot get device name (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ goto out;
+ }
+
+ priv->device = gf_rdma_get_device (this, cm_id->verbs,
+ device_name);
+ if (priv->device == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "cannot get infiniband device %s (peer:%s me:%s)",
+ device_name, this->peerinfo.identifier,
+ this->myinfo.identifier);
+ goto out;
+ }
+
+ priv->peer.send_count = options->send_count;
+ priv->peer.recv_count = options->recv_count;
+ priv->peer.send_size = options->send_size;
+ priv->peer.recv_size = options->recv_size;
+ priv->peer.cm_id = cm_id;
+ INIT_LIST_HEAD (&priv->peer.ioq);
+
+ pthread_mutex_init (&priv->write_mutex, NULL);
+ pthread_mutex_init (&priv->recv_mutex, NULL);
+
+ cm_id->context = this;
+
+ new = rpc_transport_ref (this);
+ this = NULL;
+out:
+ if (this != NULL) {
+ if (this->private != NULL) {
+ GF_FREE (this->private);
+ }
+
+ if (this->name != NULL) {
+ GF_FREE (this->name);
+ }
+
+ GF_FREE (this);
+ }
+
+ return new;
+}
+
+
+static int
+gf_rdma_cm_handle_connect_request (struct rdma_cm_event *event)
+{
+ int ret = -1;
+ rpc_transport_t *this = NULL, *listener = NULL;
+ struct rdma_cm_id *child_cm_id = NULL, *listener_cm_id = NULL;
+ struct rdma_conn_param conn_param = {0, };
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_options_t *options = NULL;
+
+ child_cm_id = event->id;
+ listener_cm_id = event->listen_id;
+
+ listener = listener_cm_id->context;
+ priv = listener->private;
+ options = &priv->options;
+
+ this = gf_rdma_transport_new (listener, child_cm_id);
+ if (this == NULL) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "could not create a transport for incoming connection"
+ " (me.name:%s me.identifier:%s)", listener->name,
+ listener->myinfo.identifier);
+ rdma_destroy_id (child_cm_id);
+ goto out;
+ }
+
+ gf_log (listener->name, GF_LOG_TRACE,
+ "got a connect request (me:%s peer:%s)",
+ listener->myinfo.identifier, this->peerinfo.identifier);
+
+ ret = gf_rdma_create_qp (this);
+ if (ret < 0) {
+ gf_log (listener->name, GF_LOG_WARNING,
+ "could not create QP (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = options->attr_retry_cnt;
+ conn_param.rnr_retry_count = options->attr_rnr_retry;
+
+ ret = rdma_accept(child_cm_id, &conn_param);
+ if (ret < 0) {
+ gf_log (listener->name, GF_LOG_WARNING, "rdma_accept failed "
+ "peer:%s me:%s (%s)", this->peerinfo.identifier,
+ this->myinfo.identifier, strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+
+static int
+gf_rdma_cm_handle_route_resolved (struct rdma_cm_event *event)
+{
+ struct rdma_conn_param conn_param = {0, };
+ int ret = 0;
+ rpc_transport_t *this = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_options_t *options = NULL;
+
+ if (event == NULL) {
+ goto out;
+ }
+
+ this = event->id->context;
+
+ priv = this->private;
+ peer = &priv->peer;
+ options = &priv->options;
+
+ ret = gf_rdma_create_qp (this);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not create QP (peer:%s me:%s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = options->attr_retry_cnt;
+ conn_param.rnr_retry_count = options->attr_rnr_retry;
+
+ ret = rdma_connect(peer->cm_id, &conn_param);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_connect failed (%s)", strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ goto out;
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "route resolved (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+static int
+gf_rdma_cm_handle_addr_resolved (struct rdma_cm_event *event)
+{
+ rpc_transport_t *this = NULL;
+ gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ int ret = 0;
+
+ this = event->id->context;
+
+ priv = this->private;
+ peer = &priv->peer;
+
+ GF_ASSERT (peer->cm_id == event->id);
+
+ this->myinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.src_addr);
+ memcpy (&this->myinfo.sockaddr, &peer->cm_id->route.addr.src_addr,
+ this->myinfo.sockaddr_len);
+
+ this->peerinfo.sockaddr_len = sizeof (peer->cm_id->route.addr.dst_addr);
+ memcpy (&this->peerinfo.sockaddr, &peer->cm_id->route.addr.dst_addr,
+ this->peerinfo.sockaddr_len);
+
+ gf_rdma_get_transport_identifiers (this);
+
+ ret = rdma_resolve_route(peer->cm_id, 2000);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rdma_resolve_route failed (me:%s peer:%s) (%s)",
+ this->myinfo.identifier, this->peerinfo.identifier,
+ strerror (errno));
+ gf_rdma_cm_handle_disconnect (this);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE, "Address resolved (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
+ return ret;
+}
+
+
+static void
+gf_rdma_cm_handle_disconnect (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+ char need_unref = 0, connected = 0;
+
+ priv = this->private;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "peer disconnected, cleaning up");
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (priv->peer.cm_id != NULL) {
+ need_unref = 1;
+ connected = priv->connected;
priv->connected = 0;
}
+
+ __gf_rdma_teardown (this);
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ if (connected) {
+ rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
+ }
+
+ if (need_unref)
+ rpc_transport_unref (this);
+
+}
+
+
+static int
+gf_rdma_cm_handle_event_established (struct rdma_cm_event *event)
+{
+ rpc_transport_t *this = NULL;
+ gf_rdma_private_t *priv = NULL;
+ struct rdma_cm_id *cm_id = NULL;
+ int ret = 0;
+
+ cm_id = event->id;
+ this = cm_id->context;
+ priv = this->private;
+
+ priv->connected = 1;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ priv->peer.quota = 1;
+ priv->peer.quota_set = 0;
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
+ if (priv->entity == GF_RDMA_CLIENT) {
+ ret = rpc_transport_notify (this, RPC_TRANSPORT_CONNECT, this);
+
+ } else if (priv->entity == GF_RDMA_SERVER) {
+ ret = rpc_transport_notify (priv->listener,
+ RPC_TRANSPORT_ACCEPT, this);
}
+ if (ret < 0) {
+ gf_rdma_disconnect (this);
+ }
+
+ gf_log (this->name, GF_LOG_TRACE,
+ "recieved event RDMA_CM_EVENT_ESTABLISHED (me:%s peer:%s)",
+ this->myinfo.identifier, this->peerinfo.identifier);
+
return ret;
}
+static int
+gf_rdma_cm_handle_event_error (rpc_transport_t *this)
+{
+ gf_rdma_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->entity != GF_RDMA_SERVER_LISTENER) {
+ gf_rdma_cm_handle_disconnect (this);
+ }
+
+ return 0;
+}
+
+
+static int
+gf_rdma_cm_handle_device_removal (struct rdma_cm_event *event)
+{
+ return 0;
+}
+
+
+static void *
+gf_rdma_cm_event_handler (void *data)
+{
+ struct rdma_cm_event *event = NULL;
+ int ret = 0;
+ rpc_transport_t *this = NULL;
+ struct rdma_event_channel *event_channel = NULL;
+
+ event_channel = data;
+
+ while (1) {
+ ret = rdma_get_cm_event (event_channel, &event);
+ if (ret != 0) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "rdma_cm_get_event failed (%s)",
+ strerror (errno));
+ break;
+ }
+
+ switch (event->event) {
+ case RDMA_CM_EVENT_ADDR_RESOLVED:
+ gf_rdma_cm_handle_addr_resolved (event);
+ break;
+
+ case RDMA_CM_EVENT_ROUTE_RESOLVED:
+ gf_rdma_cm_handle_route_resolved (event);
+ break;
+
+ case RDMA_CM_EVENT_CONNECT_REQUEST:
+ gf_rdma_cm_handle_connect_request (event);
+ break;
+
+ case RDMA_CM_EVENT_ESTABLISHED:
+ gf_rdma_cm_handle_event_established (event);
+ break;
+
+ case RDMA_CM_EVENT_ADDR_ERROR:
+ case RDMA_CM_EVENT_ROUTE_ERROR:
+ case RDMA_CM_EVENT_CONNECT_ERROR:
+ case RDMA_CM_EVENT_UNREACHABLE:
+ case RDMA_CM_EVENT_REJECTED:
+ this = event->id->context;
+
+ gf_log (this->name, GF_LOG_WARNING,
+ "cma event %s, error %d (me:%s peer:%s)\n",
+ rdma_event_str(event->event), event->status,
+ this->myinfo.identifier,
+ this->peerinfo.identifier);
+
+ rdma_ack_cm_event (event);
+ event = NULL;
+
+ gf_rdma_cm_handle_event_error (this);
+ continue;
+
+ case RDMA_CM_EVENT_DISCONNECTED:
+ this = event->id->context;
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "recieved disconnect (me:%s peer:%s)\n",
+ this->myinfo.identifier,
+ this->peerinfo.identifier);
+
+ rdma_ack_cm_event (event);
+ event = NULL;
+
+ gf_rdma_cm_handle_disconnect (this);
+ continue;
+
+ case RDMA_CM_EVENT_DEVICE_REMOVAL:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "device removed");
+ gf_rdma_cm_handle_device_removal (event);
+ break;
+
+ default:
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "unhandled event: %s, ignoring",
+ rdma_event_str(event->event));
+ break;
+ }
+
+ rdma_ack_cm_event (event);
+ }
+
+ return NULL;
+}
+
+
static int32_t
gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
{
@@ -355,33 +1140,33 @@ gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
int
__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info,
- struct iovec *rpchdr, uint32_t *ptr,
+ struct iovec *rpchdr, gf_rdma_header_t *hdr,
gf_rdma_errcode_t err)
{
- uint32_t *startp = NULL;
struct rpc_msg *rpc_msg = NULL;
- startp = ptr;
if (reply_info != NULL) {
- *ptr++ = hton32(reply_info->rm_xid);
+ hdr->rm_xid = hton32(reply_info->rm_xid);
} else {
rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
* only one vector.
* (which is true)
*/
- *ptr++ = rpc_msg->rm_xid;
+ hdr->rm_xid = rpc_msg->rm_xid;
}
- *ptr++ = hton32(GF_RDMA_VERSION);
- *ptr++ = hton32(peer->send_count);
- *ptr++ = hton32(GF_RDMA_ERROR);
- *ptr++ = hton32(err);
+ hdr->rm_vers = hton32(GF_RDMA_VERSION);
+ hdr->rm_credit = hton32(peer->send_count);
+ hdr->rm_type = hton32(GF_RDMA_ERROR);
+ hdr->rm_body.rm_error.rm_type = hton32(err);
if (err == ERR_VERS) {
- *ptr++ = hton32(GF_RDMA_VERSION);
- *ptr++ = hton32(GF_RDMA_VERSION);
+ hdr->rm_body.rm_error.rm_version.gf_rdma_vers_low
+ = hton32(GF_RDMA_VERSION);
+ hdr->rm_body.rm_error.rm_version.gf_rdma_vers_high
+ = hton32(GF_RDMA_VERSION);
}
- return (int)((unsigned long)ptr - (unsigned long)startp);
+ return sizeof (*hdr);
}
@@ -393,7 +1178,7 @@ __gf_rdma_send_error (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
int32_t ret = -1, len = 0;
len = __gf_rdma_encode_error (peer, reply_info, entry->rpchdr,
- (uint32_t *)post->buf, err);
+ (gf_rdma_header_t *)post->buf, err);
if (len == -1) {
gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
"encode error returned -1");
@@ -712,7 +1497,7 @@ out:
}
-inline void
+static inline void
__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
{
int i = 0;
@@ -833,28 +1618,6 @@ out:
}
-static int32_t
-gf_rdma_post_recv (struct ibv_srq *srq,
- gf_rdma_post_t *post)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey
- };
-
- struct ibv_recv_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- }, *bad_wr;
-
- gf_rdma_post_ref (post);
-
- return ibv_post_srq_recv (srq, &wr, &bad_wr);
-}
-
-
int
gf_rdma_post_unref (gf_rdma_post_t *post)
{
@@ -1115,7 +1878,7 @@ out:
}
-inline void
+static inline void
__gf_rdma_fill_reply_header (gf_rdma_header_t *header, struct iovec *rpchdr,
gf_rdma_reply_info_t *reply_info, int credits)
{
@@ -1716,7 +2479,7 @@ __gf_rdma_ioq_churn_entry (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry)
if (quota > 0) {
post = gf_rdma_get_post (&device->sendq);
if (post == NULL) {
- post = gf_rdma_new_post (device,
+ post = gf_rdma_new_post (peer->trans, device,
(options->send_size + 2048),
GF_RDMA_SEND_POST);
}
@@ -1993,189 +2756,6 @@ out:
return ret;
}
-#if 0
-static int
-gf_rdma_receive (rpc_transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- gf_rdma_private_t *priv = this->private;
- /* TODO: return error if !priv->connected, check with locks */
- /* TODO: boundry checks for data_ptr/offset */
- char *copy_from = NULL;
- gf_rdma_header_t *header = NULL;
- uint32_t size1, size2, data_len = 0;
- char *hdr = NULL;
- struct iobuf *iobuf = NULL;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/*
- while (!priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- copy_from = priv->data_ptr + priv->data_offset;
-
- priv->data_ptr = NULL;
- data_len = priv->data_len;
- pthread_cond_broadcast (&priv->recv_cond);
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- header = (gf_rdma_header_t *)copy_from;
- if (strcmp (header->colonO, ":O")) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: corrupt header received", this->name);
- ret = -1;
- goto err;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (data_len != (size1 + size2 + sizeof (*header))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: sizeof data read from transport is not equal "
- "to the size specified in the header",
- this->name);
- ret = -1;
- goto err;
- }
-
- copy_from += sizeof (*header);
-
- if (size1) {
- hdr = GF_CALLOC (1, size1, gf_common_mt_char);
- if (!hdr) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate header for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (hdr, copy_from, size1);
- copy_from += size1;
- *hdr_p = hdr;
- }
- *hdrlen_p = size1;
-
- if (size2) {
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size2);
- if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate IO buffer for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (iobuf->ptr, copy_from, size2);
- *iobuf_p = iobuf;
- }
-
-err:
- return ret;
-}
-#endif
-
-
-static void
-gf_rdma_destroy_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq (device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq (device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-
-static int32_t
-gf_rdma_create_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- uint64_t send_cqe = 0;
- int32_t ret = 0;
- struct ibv_device_attr device_attr = {{0}, };
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- device->recv_cq = ibv_create_cq (priv->device->context,
- options->recv_count * 2,
- device,
- device->recv_chan,
- 0);
- if (!device->recv_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of CQ for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on recv CQ of device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- do {
- ret = ibv_query_device (priv->device->context, &device_attr);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_query_device on %s returned %d (%s)",
- this->name, priv->device->device_name, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- goto out;
- }
-
- send_cqe = options->send_count * 128;
- send_cqe = (send_cqe > device_attr.max_cqe)
- ? device_attr.max_cqe : send_cqe;
-
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq (priv->device->context,
- send_cqe, device,
- device->send_chan, 0);
- if (!device->send_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of send_cq for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- if (ibv_req_notify_cq (device->send_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on send_cq for device %s"
- " failed", this->name, device->device_name);
- ret = -1;
- goto out;
- }
- } while (0);
-
-out:
- if (ret != 0)
- gf_rdma_destroy_cq (this);
-
- return ret;
-}
-
static int
gf_rdma_register_peer (gf_rdma_device_t *device, int32_t qp_num,
@@ -2274,25 +2854,6 @@ __gf_rdma_lookup_peer (gf_rdma_device_t *device, int32_t qp_num)
return peer;
}
-/*
- static gf_rdma_peer_t *
- gf_rdma_lookup_peer (gf_rdma_device_t *device,
- int32_t qp_num)
- {
- gf_rdma_qpreg_t *qpreg = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- qpreg = &device->qpreg;
- pthread_mutex_lock (&qpreg->lock);
- {
- peer = __gf_rdma_lookup_peer (device, qp_num);
- }
- pthread_mutex_unlock (&qpreg->lock);
-
- return peer;
- }
-*/
-
static void
__gf_rdma_destroy_qp (rpc_transport_t *this)
@@ -2302,7 +2863,7 @@ __gf_rdma_destroy_qp (rpc_transport_t *this)
priv = this->private;
if (priv->peer.qp) {
gf_rdma_unregister_peer (priv->device, priv->peer.qp->qp_num);
- ibv_destroy_qp (priv->peer.qp);
+ rdma_destroy_qp (priv->peer.cm_id);
}
priv->peer.qp = NULL;
@@ -2313,18 +2874,36 @@ __gf_rdma_destroy_qp (rpc_transport_t *this)
static int32_t
gf_rdma_create_qp (rpc_transport_t *this)
{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- int32_t ret = 0;
- gf_rdma_peer_t *peer = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_device_t *device = NULL;
+ int32_t ret = 0;
+ gf_rdma_peer_t *peer = NULL;
+ char *device_name = NULL;
priv = this->private;
- options = &priv->options;
- device = priv->device;
peer = &priv->peer;
+ device_name = (char *)ibv_get_device_name (peer->cm_id->verbs->device);
+ if (device_name == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "cannot get device_name");
+ goto out;
+ }
+
+ device = gf_rdma_get_device (this, peer->cm_id->verbs,
+ device_name);
+ if (device == NULL) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_WARNING, "cannot get device for "
+ "device %s", device_name);
+ goto out;
+ }
+
+ if (priv->device == NULL) {
+ priv->device = device;
+ }
+
struct ibv_qp_init_attr init_attr = {
.send_cq = device->send_cq,
.recv_cq = device->recv_cq,
@@ -2338,39 +2917,16 @@ gf_rdma_create_qp (rpc_transport_t *this)
.qp_type = IBV_QPT_RC
};
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_INIT,
- .pkey_index = 0,
- .port_num = options->port,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE
- };
-
- peer->qp = ibv_create_qp (device->pd, &init_attr);
- if (!peer->qp) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "%s: could not create QP",
- this->name);
- ret = -1;
- goto out;
- } else if (ibv_modify_qp (peer->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "%s: failed to modify QP to INIT state",
- this->name);
+ ret = rdma_create_qp(peer->cm_id, device->pd, &init_attr);
+ if (ret != 0) {
+ gf_log (peer->trans->name, GF_LOG_CRITICAL,
+ "%s: could not create QP (%s)", this->name,
+ strerror (errno));
ret = -1;
goto out;
}
- peer->local_lid = gf_rdma_get_local_lid (device->context,
- options->port);
- peer->local_qpn = peer->qp->qp_num;
- peer->local_psn = lrand48 () & 0xffffff;
+ peer->qp = peer->cm_id->qp;
ret = gf_rdma_register_peer (device, peer->qp->qp_num, peer);
@@ -2382,300 +2938,52 @@ out:
}
-static void
-gf_rdma_destroy_posts (rpc_transport_t *this)
-{
-
-}
-
-
-static int32_t
-__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size,
- gf_rdma_queue_t *q, gf_rdma_post_type_t type)
-{
- int32_t i = 0;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- for (i=0 ; i<count ; i++) {
- gf_rdma_post_t *post = NULL;
-
- post = gf_rdma_new_post (device, size + 2048, type);
- if (!post) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: post creation failed",
- this->name);
- ret = -1;
- break;
- }
-
- gf_rdma_put_post (q, post);
- }
- return ret;
-}
-
-
static int32_t
-gf_rdma_create_posts (rpc_transport_t *this)
+__gf_rdma_teardown (rpc_transport_t *this)
{
- int32_t i = 0, ret = 0;
- gf_rdma_post_t *post = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
+ gf_rdma_private_t *priv = NULL;
+ gf_rdma_peer_t *peer = NULL;
priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- ret = __gf_rdma_create_posts (this, options->send_count,
- options->send_size,
- &device->sendq, GF_RDMA_SEND_POST);
- if (!ret)
- ret = __gf_rdma_create_posts (this, options->recv_count,
- options->recv_size,
- &device->recvq,
- GF_RDMA_RECV_POST);
+ peer = &priv->peer;
- if (!ret) {
- for (i=0 ; i<options->recv_count ; i++) {
- post = gf_rdma_get_post (&device->recvq);
- if (gf_rdma_post_recv (device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
+ if (peer->cm_id->qp != NULL) {
+ __gf_rdma_destroy_qp (this);
}
- if (ret)
- gf_rdma_destroy_posts (this);
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_connect_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = this->private;
- gf_rdma_options_t *options = &priv->options;
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = options->mtu,
- .dest_qp_num = priv->peer.remote_qpn,
- .rq_psn = priv->peer.remote_psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE,
- .ah_attr = {
- .is_global = 0,
- .dlid = priv->peer.remote_lid,
- .sl = 0,
- .src_path_bits = 0,
- .port_num = options->port
- }
- };
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTR\n");
- return -1;
+ if (!list_empty (&priv->peer.ioq)) {
+ __gf_rdma_ioq_flush (peer);
}
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = options->attr_timeout;
- attr.retry_cnt = options->attr_retry_cnt;
- attr.rnr_retry = options->attr_rnr_retry;
- attr.sq_psn = priv->peer.local_psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTS\n");
- return -1;
+ if (peer->cm_id != NULL) {
+ rdma_destroy_id (peer->cm_id);
+ peer->cm_id = NULL;
}
+ /* TODO: decrement cq size */
return 0;
}
+
static int32_t
-__gf_rdma_teardown (rpc_transport_t *this)
+gf_rdma_teardown (rpc_transport_t *this)
{
+ int32_t ret = 0;
gf_rdma_private_t *priv = NULL;
- priv = this->private;
- __gf_rdma_destroy_qp (this);
-
- if (!list_empty (&priv->peer.ioq)) {
- __gf_rdma_ioq_flush (&priv->peer);
+ if (this == NULL) {
+ goto out;
}
- /* TODO: decrement cq size */
- return 0;
-}
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__tcp_rwv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- gf_rdma_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
-
priv = this->private;
- sock = priv->sock;
- opvector = vector;
- opcount = count;
- while (opcount)
+ pthread_mutex_lock (&priv->write_mutex);
{
- if (write)
- {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN))
- {
- /* done for now */
- break;
- }
- }
- else
- {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN)
- {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0)
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1)
- {
- if (errno == EINTR)
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- if (write && !priv->connected &&
- (errno == ECONNREFUSED))
- gf_log (this->name, GF_LOG_ERROR,
- "possible mismatch of 'rpc-transport-type'"
- " in protocol server and client. "
- "check volume file");
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret)
- {
- if ((ret - moved) >= opvector[0].iov_len)
- {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- }
- else
- {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len)
- {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__tcp_readv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __tcp_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__tcp_writev (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- ret = __tcp_rwv (this, vector, count, pending_vector,
- pending_count, 1);
-
- if (ret > 0) {
- /* TODO: Avoid multiple calls when socket is already
- registered for POLLOUT */
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock, priv->idx, -1, 1);
- } else if (ret == 0) {
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock,
- priv->idx, -1, 0);
+ ret = __gf_rdma_teardown (this);
}
+ pthread_mutex_unlock (&priv->write_mutex);
+out:
return ret;
}
@@ -2770,10 +3078,11 @@ inline int32_t
gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
size_t bytes_in_post)
{
- gf_rdma_header_t *header = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int32_t ret = -1;
+ gf_rdma_header_t *header = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int32_t ret = -1;
+ struct rpc_msg rpc_msg = {0, };
header = (gf_rdma_header_t *)post->buf;
header->rm_body.rm_error.rm_type
@@ -2785,6 +3094,10 @@ gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_high);
}
+ rpc_msg.rm_xid = header->rm_xid;
+ rpc_msg.rm_direction = REPLY;
+ rpc_msg.rm_reply.rp_stat = MSG_DENIED;
+
iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, bytes_in_post);
if (iobuf == NULL) {
ret = -1;
@@ -2799,15 +3112,15 @@ gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
iobref_add (iobref, iobuf);
iobuf_unref (iobuf);
- /*
- * FIXME: construct an appropriate rpc-msg here, what is being sent
- * to rpc is not correct.
- */
- post->ctx.vector[0].iov_base = iobuf_ptr (iobuf);
- post->ctx.vector[0].iov_len = bytes_in_post;
-
- memcpy (post->ctx.vector[0].iov_base, (char *)post->buf,
- post->ctx.vector[0].iov_len);
+
+ ret = rpc_reply_to_xdr (&rpc_msg, iobuf_ptr (iobuf),
+ iobuf_pagesize (iobuf), &post->ctx.vector[0]);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "Failed to create RPC reply");
+ goto out;
+ }
+
post->ctx.count = 1;
iobuf = NULL;
@@ -2925,9 +3238,7 @@ out:
*readch = NULL;
}
- if (write_ary != NULL) {
- GF_FREE (write_ary);
- }
+ GF_FREE (write_ary);
}
return ret;
@@ -2977,8 +3288,8 @@ gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
case GF_RDMA_ERROR:
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "recieved a msg of type RDMA_ERROR");
- /* ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post); */
+ "received a msg of type RDMA_ERROR");
+ ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post);
break;
default:
@@ -3338,6 +3649,7 @@ gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
uint32_t *ptr = NULL;
enum msg_type msg_type = 0;
gf_rdma_header_t *header = NULL;
+ gf_rdma_private_t *priv = NULL;
post = (gf_rdma_post_t *) (long) wc->wr_id;
if (post == NULL) {
@@ -3355,6 +3667,26 @@ gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
header = (gf_rdma_header_t *)post->buf;
+ priv = peer->trans->private;
+
+ pthread_mutex_lock (&priv->write_mutex);
+ {
+ if (!priv->peer.quota_set) {
+ priv->peer.quota_set = 1;
+
+ /* Initially peer.quota is set to 1 as per RFC 5666. We
+ * have to account for the quota used while sending
+ * first msg (which may or may not be returned to pool
+ * at this point) while deriving peer.quota from
+ * header->rm_credit. Hence the arithmatic below,
+ * instead of directly setting it to header->rm_credit.
+ */
+ priv->peer.quota = header->rm_credit
+ - ( 1 - priv->peer.quota);
+ }
+ }
+ pthread_mutex_unlock (&priv->write_mutex);
+
switch (header->rm_type) {
case GF_RDMA_MSG:
ptr = (uint32_t *)post->ctx.vector[0].iov_base;
@@ -3370,24 +3702,32 @@ gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
break;
case GF_RDMA_ERROR:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "an error has happened while transmission of msg, "
- "disconnecting the transport");
- rpc_transport_disconnect (peer->trans);
- goto out;
-
-/* ret = gf_rdma_pollin_notify (peer, post);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "pollin notification failed");
- }
- goto out;
-*/
+ if (header->rm_body.rm_error.rm_type == ERR_CHUNK) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
+ "peer (%s), couldn't encode or decode the msg "
+ "properly or write chunks were not provided "
+ "for replies that were bigger than "
+ "RDMA_INLINE_THRESHOLD (%d)",
+ peer->trans->peerinfo.identifier,
+ GLUSTERFS_RDMA_INLINE_THRESHOLD);
+ ret = gf_rdma_pollin_notify (peer, post);
+ if (ret == -1) {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
+ "pollin notification failed");
+ }
+ goto out;
+ } else {
+ gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
+ "an error has happened while transmission of "
+ "msg, disconnecting the transport");
+ ret = -1;
+ goto out;
+ }
default:
gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
"invalid rdma msg-type (%d)", header->rm_type);
- break;
+ goto out;
}
if (msg_type == CALL) {
@@ -3414,6 +3754,42 @@ out:
return;
}
+void *
+gf_rdma_async_event_thread (void *context)
+{
+ struct ibv_async_event event;
+ int ret;
+
+ while (1) {
+ do {
+ ret = ibv_get_async_event((struct ibv_context *)context,
+ &event);
+
+