diff options
author | Kaleb S. KEITHLEY <kkeithle@redhat.com> | 2017-01-20 11:11:46 -0500 |
---|---|---|
committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2018-05-09 17:41:37 +0000 |
commit | 641355a0fe5ae18d983ce91b46ecafd50d6eb5c1 (patch) | |
tree | 183fd289b55027e991e8c1a6060d5dad5f5d984c | |
parent | 98d3de5ce7b517cb2dfdd58b4111c26e6cefaf00 (diff) |
core: remove experimental xlators and associated tests
experimental xlators removed from 4.1
Signed-off-by: ShyamsundarR <srangana@redhat.com>
Change-Id: I34419ce22ca09b7626b8f9382c377a614fd9fed8
Updates: bz#1575386
57 files changed, 2 insertions, 6056 deletions
diff --git a/configure.ac b/configure.ac index b818d3fac0d..5c598717337 100644 --- a/configure.ac +++ b/configure.ac @@ -124,8 +124,6 @@ AC_CONFIG_FILES([Makefile xlators/features/arbiter/src/Makefile xlators/features/thin-arbiter/Makefile xlators/features/thin-arbiter/src/Makefile - xlators/experimental/fdl/Makefile - xlators/experimental/fdl/src/Makefile xlators/features/changelog/Makefile xlators/features/changelog/src/Makefile xlators/features/changelog/lib/Makefile @@ -199,23 +197,6 @@ AC_CONFIG_FILES([Makefile xlators/mgmt/Makefile xlators/mgmt/glusterd/Makefile xlators/mgmt/glusterd/src/Makefile - xlators/experimental/Makefile - xlators/experimental/jbr-client/Makefile - xlators/experimental/jbr-client/src/Makefile - xlators/experimental/jbr-server/Makefile - xlators/experimental/jbr-server/src/Makefile - xlators/experimental/dht2/Makefile - xlators/experimental/dht2/dht2-client/Makefile - xlators/experimental/dht2/dht2-client/src/Makefile - xlators/experimental/dht2/dht2-server/Makefile - xlators/experimental/dht2/dht2-server/src/Makefile - xlators/experimental/posix2/Makefile - xlators/experimental/posix2/ds/Makefile - xlators/experimental/posix2/ds/src/Makefile - xlators/experimental/posix2/mds/Makefile - xlators/experimental/posix2/mds/src/Makefile - xlators/experimental/posix2/common/Makefile - xlators/experimental/posix2/common/src/Makefile cli/Makefile cli/src/Makefile doc/Makefile @@ -1498,18 +1479,6 @@ AC_SUBST(UNITTEST_LDFLAGS) AC_SUBST(CFLAGS) # end enable debug section -# experimental section -AC_ARG_ENABLE([experimental], - AC_HELP_STRING([--disable-experimental], - [Disable building experimental xlators])) - -BUILD_EXPERIMENTAL="yes" -if test "x$enable_experimental" = "xno"; then - BUILD_EXPERIMENTAL="no" -fi -AM_CONDITIONAL([ENABLE_EXPERIMENTAL], [test x$BUILD_EXPERIMENTAL = xyes]) -#end experimental section - # EC dynamic code generation section EC_DYNAMIC_SUPPORT="none" @@ -1693,7 +1662,6 @@ echo "POSIX ACLs : $BUILD_POSIX_ACLS" echo "Data Classification : $BUILD_GFDB" echo "SELinux features : $USE_SELINUX" echo "firewalld-config : $BUILD_FIREWALLD" -echo "Experimental xlators : $BUILD_EXPERIMENTAL" echo "Events : $BUILD_EVENTS" echo "EC dynamic support : $EC_DYNAMIC_SUPPORT" echo "Use memory pools : $USE_MEMPOOL" diff --git a/glusterfs.spec.in b/glusterfs.spec.in index c03577d7695..57f8da755e4 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -1108,10 +1108,6 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/nl-cache.so %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system/posix-acl.so -%if 0%{?!_without_server:1} -%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/fdl.so -%endif %dir %attr(0775,gluster,gluster) %{_rundir}/gluster %if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1} %{_tmpfilesdir}/gluster.conf @@ -1163,8 +1159,6 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/*.so %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so -%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/dht2c.so %files extra-xlators %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator @@ -1320,7 +1314,6 @@ exit 0 %{_mandir}/man8/gluster-setgfid2path.8* # xlators - %{_libdir}/libposix2common.so %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so @@ -1351,12 +1344,6 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so %dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/decompounder.so -%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/jbrc.so - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/jbr.so - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/dht2s.so - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/posix2-ds.so - %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/posix2-mds.so # snap_scheduler %{_sbindir}/snap_scheduler.py @@ -1443,8 +1430,6 @@ exit 0 %{_prefix}/lib/firewalld/services/glusterfs.xml %endif -%{_sbindir}/gf_logdump -%{_sbindir}/gf_recon # end of server files %endif diff --git a/tests/basic/jbr/jbr-volgen.t b/tests/basic/jbr/jbr-volgen.t deleted file mode 100644 index f368710c158..00000000000 --- a/tests/basic/jbr/jbr-volgen.t +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -volfiles=${GLUSTERD_WORKDIR}/vols/${V0}/ -check_brick_volfiles () { - for vf in ${volfiles}${V0}.$(hostname).*.vol; do - grep -qs experimental/jbr $vf || return - # At least for now, nothing else would put a client translator - # in a brick volfile. - grep -qs protocol/client $vf || return - done - echo "OK" -} - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} -TEST $CLI volume set $V0 cluster.jbr on - -# Check that the client volfile got modified properly. -TEST grep -qs experimental/jbrc ${volfiles}${V0}.tcp-fuse.vol - -# Check that the brick volfiles got modified as well. -EXPECT "OK" check_brick_volfiles - -# Put things back and make sure the "undo" worked. -TEST $CLI volume set $V0 cluster.jbr off -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 -echo hello > $M0/probe -EXPECT hello cat ${B0}/${V0}1/probe -EXPECT hello cat ${B0}/${V0}2/probe - -cleanup -#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758 -#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758 diff --git a/tests/basic/jbr/jbr.t b/tests/basic/jbr/jbr.t deleted file mode 100755 index 605344b5a7e..00000000000 --- a/tests/basic/jbr/jbr.t +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../cluster.rc -. $(dirname $0)/../../snapshot.rc -. $(dirname $0)/../../fdl.rc - -cleanup; - -TEST verify_lvm_version; -#Create cluster with 3 nodes -TEST launch_cluster 3; -TEST setup_lvm 3 - -TEST $CLI_1 peer probe $H2; -TEST $CLI_1 peer probe $H3; -EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; - -TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3 -TEST $CLI_1 volume set $V0 cluster.jbr on -TEST $CLI_1 volume set $V0 cluster.jbr.quorum-percent 100 -TEST $CLI_1 volume set $V0 features.fdl on -#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG -TEST $CLI_1 volume start $V0 - -TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0; - -EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" jbrc_child_up_status $V0 0 - -echo "file" > $M0/file1 -TEST stat $L1/file1 -TEST stat $L2/file1 -TEST stat $L3/file1 - -cleanup; -#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758 -#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758 diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t deleted file mode 100644 index 34b941d2f2a..00000000000 --- a/tests/features/fdl-overflow.t +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc -. $(dirname $0)/../fdl.rc - -_check_sizes () { - local n=0 - local sz - local total_sz=0 - - # We don't care about the sizes of the meta files. That would be - # embedding too much of the implementation into the test. - n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l) - [ $n = 2 ] || return 1 - - # We *do* care about the sizes of the data files, which should exactly - # reflect the amount of data written via dd. - n=0 - while read sz name; do - G_LOG "found journal ${name} size ${sz}MB" - n=$((n+1)) - total_sz=$((total_sz+sz)) - done < <(du -sm ${log_base}/${log_id}-data-*.jnl) - [ $n = 2 ] || return 1 - # On our CentOS and NetBSD regression-test systems, but not on my Fedora - # development system, each file ends up being slightly larger than its - # data size because of metadata, and 'du' rounds that up to a full extra - # megabyte. We'll allow either result, because what we're really - # looking for is a complete failure to roll over from one file to - # another at the appropriate size. - [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1 - - return 0 -} - -check_sizes () { - set -x - _check_sizes - ret=$? - set +x - return ret -} - -if [ x"$OSTYPE" = x"NetBSD" ]; then - CREAT_OFLAG="creat," -else - CREAT_OFLAG="" -fi - -TEST rm -f ${log_base}/${log_id}-*.log -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 changelog.changelog off -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and unmount/stop so we can see log sizes. -TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \ - oflag=${CREAT_OFLAG}sync -TEST umount $M0 -TEST $CLI volume stop $V0 - -TEST _check_sizes - -cleanup -#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758 -#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758 diff --git a/tests/features/fdl.t b/tests/features/fdl.t deleted file mode 100644 index 5a3c13fc850..00000000000 --- a/tests/features/fdl.t +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc -. $(dirname $0)/../fdl.rc - -if [ x"$OSTYPE" = x"NetBSD" ]; then - CREAT_OFLAG="creat," -else - CREAT_OFLAG="" -fi - -TEST rm -f $FDL_META_FILE $FDL_DATA_FILE -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 changelog.changelog off -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and unmount. -TEST mkdir -p $M0/abc/def -TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \ - oflag=${CREAT_OFLAG}sync -TEST chmod 314 $M0/abc/def/ghi -TEST rm -rf $M0/abc -TEST umount $M0 - -# Check that gf_logdump works, and shows the ops we just issued. There will be -# more SETATTR ops than the one corresponding to our chmod, because some are -# issued internally. We have to guess a bit about where the log will be. -TEST check_logfile GF_FOP_MKDIR 2 -TEST check_logfile GF_FOP_CREATE 1 -TEST check_logfile GF_FOP_WRITE 2 -TEST check_logfile GF_FOP_SETATTR 1 -TEST check_logfile GF_FOP_UNLINK 1 -TEST check_logfile GF_FOP_RMDIR 2 - -cleanup -#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758 -#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758 diff --git a/tests/features/recon.t b/tests/features/recon.t deleted file mode 100644 index 82ef6fd755d..00000000000 --- a/tests/features/recon.t +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../traps.rc -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc -. $(dirname $0)/../fdl.rc - -tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX) -push_trapfunc "rm -rf $tmpdir" - -write_file () { - echo "peekaboo" > $1 -} - -TEST rm -f $FDL_META_FILE $FDL_DATA_FILE -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and then copy off the journal files for later. -TEST mkdir -p $M0/abc/def -TEST write_file $M0/abc/def/ghi -#EST chmod 314 $M0/abc/def/ghi -cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir} - -# Get back to an empty state and unmount. -TEST rm -rf $M0/abc -TEST umount $M0 - -# Make sure we really are in an empty state. Otherwise the tests below could -# pass just because we never cleaned up in the first place. -TEST [ ! -d ${B0}/${V0}-0/abc ] - -# Create a stub volfile. -vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol -vol_id_line=$(grep volume-id ${vol_file}) -cat > ${tmpdir}/recon.vol << EOF -volume recon-posix - type storage/posix - option directory ${B0}/${V0}-0 -${vol_id_line} -end-volume -EOF - -TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \ - ${tmpdir}/$(basename ${FDL_DATA_FILE}) - -TEST [ -d ${B0}/${V0}-0/abc/def ] -EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi -# TBD: test permissions, xattrs - -cleanup -#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758 -#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758 diff --git a/xlators/Makefile.am b/xlators/Makefile.am index 876a31f8e81..29549db724e 100644 --- a/xlators/Makefile.am +++ b/xlators/Makefile.am @@ -1,16 +1,12 @@ -if ENABLE_EXPERIMENTAL - EXPERIMENTAL = experimental -endif - if BUILD_GNFS GNFS_DIR = nfs endif DIST_SUBDIRS = cluster storage protocol performance debug features encryption \ - mount nfs mgmt system playground meta experimental + mount nfs mgmt system playground meta SUBDIRS = cluster storage protocol performance debug features encryption \ - mount ${GNFS_DIR} mgmt system playground meta $(EXPERIMENTAL) + mount ${GNFS_DIR} mgmt system playground meta EXTRA_DIST = xlator.sym diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am deleted file mode 100644 index a530845c4c0..00000000000 --- a/xlators/experimental/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = jbr-client jbr-server fdl dht2 posix2 - -CLEANFILES = diff --git a/xlators/experimental/README.md b/xlators/experimental/README.md deleted file mode 100644 index b00f24e114b..00000000000 --- a/xlators/experimental/README.md +++ /dev/null @@ -1,107 +0,0 @@ -# Purpose of this directory - -This directory is created to host experimental gluster translators. A new -translator that is *experimental* in nature, would need to create its own -subdirectory under this directory, to host/publish its work. - -Example: - The first commit should include the following changes - 1. xlators/experimental/Makefile.am - NOTE: Add foobar to the list of SUBDIRS here - 2. xlators/experimental/foobar - 3. xlators/experimental/foobar/Makefle.am - NOTE: Can be empty initially in the first commit - 4. configure.ac - NOTE: Include your experimental Makefile under AC_CONFIG_FILES - 5. xlators/experimental/foobar/README.md - NOTE: The readme should cover details as required for the translator to be - accepted as experimental, primarily including a link to the specification - under the gluster-specs repository [1]. Later the readme should suffice - as an entry point for developers and users alike, who wish to experiment - with the xlator under development - 6. xlators/experimental/foobar/TODO.md - NOTE: This is a list of TODO's identified during the development process - that needs addressing over time. These include exceptions granted during - the review process, for things not addressed when commits are merged into - the repository - -# Why is it provided - -Quite often translator development that happens out of tree, does not get -enough eyeballs early in its development phase, has not undergone CI -(regression/continuous integration testing), and at times is not well integrated -with the rest of gluster stack. - -Also, when such out of tree translators are submitted for acceptance, it is a -bulk commit that makes review difficult and inefficient. Such submissions also -have to be merged forward, and depending on the time spent in developing the -translator the master branch could have moved far ahead, making this a painful -activity. - -Experimental is born out of such needs, to provide xlator developers, - - Early access to CI - - Ability to adapt to ongoing changes in other parts of gluster - - More eye balls on the code and design aspects of the translator - - TBD: What else? - -and for maintainers, - - Ability to look at smaller change sets in the review process - - Ability to verify/check implementation against the specification provided - -# General rules - -1. If a new translator is added under here it should, at the very least, pass -compilation. - -2. All translators under the experimental directory are shipped as a part of -gluster-experimental RPMs. -TBD: Spec file and other artifacts for the gluster-experimental RPM needs to be -fleshed out. - -3. Experimental translators can leverage the CI framework as needed. Tests need -to be hosted under xlators/experimental/tests initially, and later moved to the -appropriate tests/ directory as the xlator matures. It is encouraged to provide -tests for each commit or series of commits, so that code and tests can be -inspected together. - -4. If any experimental translator breaks CI, it is quarantined till demonstrable -proof towards the contrary is provided. This is applicable as tests are moved -out of experimental tests directory to the CI framework directory, as otherwise -experimental tests are not a part of regular CI regression runs. - -5. An experimental translator need not function at all, as a result commits can -be merged pretty much at will as long as other rules as stated are not violated. - -6. Experimental submissions will be assigned a existing maintainer, to aid -merging commits and ensure aspects of gluster code submissions are respected. -When an experimental xlator is proposed and the first commit posted -a mail to gluster-devel@gluster.org requesting attention, will assign the -maintainer buddy for the submission. -NOTE: As we scale, this may change. - -6. More? - -# Getting out of the experimental jail - -So you now think your xlator is ready to leave experimental and become part of -mainline! -- TBD: guidelines pending. - -# FAQs - -1. How do I submit/commit experimental framework changes outside of my -experimental xlator? - - Provide such framework changes as a separate commit - - Conditionally ensure these are built or activated only when the experimental - feature is activated, so as to prevent normal gluster workflow to function as - before - - TBD: guidelines and/or examples pending. - -2. Ask your question either on gluster-devel@gluster.org or as a change request -to this file in gluster gerrit [2] for an answer that will be assimilated into -this readme. - -# Links -[1] http://review.gluster.org/#/q/project:glusterfs-specs - -[2] http://review.gluster.org/#/q/project:glusterfs diff --git a/xlators/experimental/dht2/Makefile.am b/xlators/experimental/dht2/Makefile.am deleted file mode 100644 index 9d910a66056..00000000000 --- a/xlators/experimental/dht2/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = dht2-client dht2-server - -CLEANFILES = diff --git a/xlators/experimental/dht2/README.md b/xlators/experimental/dht2/README.md deleted file mode 100644 index 8f249a83673..00000000000 --- a/xlators/experimental/dht2/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# DHT2 Experimental README - -DHT2 is the new distribution scheme being developed for Gluster, that -aims to remove the subdirectory spread across all DHT subvolumes. - -As a result of this work, the Gluster backend file layouts and on disk -representation of directories and files are modified, thus making DHT2 -volumes incompatible to existing DHT based Gluster deployments. - -This document presents interested users with relevant data to play around -with DHT2 volumes and provide feedback towards the same. - -REMOVEME: Design details currently under review here, - - http://review.gluster.org/#/c/13395/ - -TODO: Add more information as relevant code is pulled in - -# Directory strucutre elaborated - -## dht2-server -This directory contains code for the server side DHT2 xlator. This xlator is -intended to run on the brick graph, and is responsible for FOP synchronization, -redirection, transactions, and journal replays. - -NOTE: The server side code also handles changes to volume/cluster map and -also any rebalance activities. - -## dht2-client -This directory contains code for the client side DHT2 xlator. This xlator is -intended to run on the client/access protocol/mount graph, and is responsible -for FOP routing to the right DHT2 subvolume. It uses a volume/cluster wide map -of the routing (layout), to achieve the same. - -## dht2-common -This directory contains code that is used in common across other parts of DHT2. -For example, FOP routing store/consult abstractions that are common across the -client and server side of DHT2. - -## Issue: How to build dht2-common? - 1. Build a shared object - - We cannot ship this as a part of both the client xlator RPM - 2. Build an archive - - Symbol clashes? when both the client and server xlators are loaded as a - part of the same graph - 3. Compile with other parts of the code that needs it - - Not a very different from (2) above - - This is what is chosen at present, and maybe would be revised later diff --git a/xlators/experimental/dht2/TODO.md b/xlators/experimental/dht2/TODO.md deleted file mode 100644 index 1e2c53c5b36..00000000000 --- a/xlators/experimental/dht2/TODO.md +++ /dev/null @@ -1,3 +0,0 @@ -# DHT2 TODO list - -<Items will be added as code is pulled into the repository> diff --git a/xlators/experimental/dht2/dht2-client/Makefile.am b/xlators/experimental/dht2/dht2-client/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/dht2/dht2-client/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/dht2/dht2-client/src/Makefile.am b/xlators/experimental/dht2/dht2-client/src/Makefile.am deleted file mode 100644 index a16b9df2f76..00000000000 --- a/xlators/experimental/dht2/dht2-client/src/Makefile.am +++ /dev/null @@ -1,21 +0,0 @@ -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental -xlator_LTLIBRARIES = dht2c.la - -dht2c_sources = dht2-client-main.c - -dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c - -dht2c_la_SOURCES = $(dht2c_sources) $(dht2common_sources) -dht2c_la_LDFLAGS = -module -avoid-version -dht2c_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -AM_CPPFLAGS = $(GF_CPPFLAGS) -AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/ -AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src -AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src - -CLEANFILES = diff --git a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c deleted file mode 100644 index bd1d446e2b5..00000000000 --- a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: dht2-client-main.c - * This file contains the xlator loading functions, FOP entry points - * and options. - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "statedump.h" - -int32_t -dht2_client_init (xlator_t *this) -{ - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "Missing children in volume graph, this (%s) is" - " not a leaf translator", this->name); - return -1; - } - - return 0; -} - -void -dht2_client_fini (xlator_t *this) -{ - return; -} - -class_methods_t class_methods = { - .init = dht2_client_init, - .fini = dht2_client_fini, -}; - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -/* -struct xlator_dumpops dumpops = { -}; -*/ - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c b/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c deleted file mode 100644 index d959483b8a4..00000000000 --- a/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c +++ /dev/null @@ -1,19 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: dht2-common-map.c - * This file contains helper routines to store, consult, the volume map - * for subvolume to GFID relations. - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "logging.h" -#include "statedump.h" diff --git a/xlators/experimental/dht2/dht2-server/Makefile.am b/xlators/experimental/dht2/dht2-server/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/dht2/dht2-server/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/dht2/dht2-server/src/Makefile.am b/xlators/experimental/dht2/dht2-server/src/Makefile.am deleted file mode 100644 index 12d66d126f0..00000000000 --- a/xlators/experimental/dht2/dht2-server/src/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = dht2s.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -dht2s_sources = dht2-server-main.c - -dht2common_sources = $(top_srcdir)/xlators/experimental/dht2/dht2-common/src/dht2-common-map.c - -dht2s_la_SOURCES = $(dht2s_sources) $(dht2common_sources) -dht2s_la_LDFLAGS = -module -avoid-version -dht2s_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -AM_CPPFLAGS = $(GF_CPPFLAGS) -AM_CPPFLAGS += -I$(top_srcdir)/xlators/experimental/dht2/dht2-common/src/ -AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src -AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src - -CLEANFILES = diff --git a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c deleted file mode 100644 index 1f232cc3430..00000000000 --- a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: dht2-server-main.c - * This file contains the xlator loading functions, FOP entry points - * and options. - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "statedump.h" - -int32_t -dht2_server_init (xlator_t *this) -{ - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "Missing children in volume graph, this (%s) is" - " not a leaf translator", this->name); - return -1; - } - - return 0; -} - -void -dht2_server_fini (xlator_t *this) -{ - return; -} - -class_methods_t class_methods = { - .init = dht2_server_init, - .fini = dht2_server_fini, -}; - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -/* -struct xlator_dumpops dumpops = { -}; -*/ - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/fdl/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am deleted file mode 100644 index da80ce28317..00000000000 --- a/xlators/experimental/fdl/src/Makefile.am +++ /dev/null @@ -1,48 +0,0 @@ -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental -if WITH_SERVER -xlator_LTLIBRARIES = fdl.la -endif - -noinst_HEADERS = fdl.h - -nodist_fdl_la_SOURCES = fdl.c -fdl_la_LDFLAGS = -module -avoid-version -fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -if WITH_SERVER -sbin_PROGRAMS = gf_logdump gf_recon -endif -gf_logdump_SOURCES = logdump.c -nodist_gf_logdump_SOURCES = libfdl.c -gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS) - -# Eventually recon(ciliation) code will move elsewhere, but for now it's -# easier to have it next to the similar logdump code. -gf_recon_SOURCES = recon.c -nodist_gf_recon_SOURCES = librecon.c -gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/api/src/libgfapi.la $(GFAPI_LIBS) $(UUID_LIBS) - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ - -I$(top_srcdir)/api/src -fPIC \ - -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ - -DDATADIR=\"$(localstatedir)\" - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py -EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c - -CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \ - $(nodist_gf_recon_SOURCES) - -fdl.c: fdl-tmpl.c gen_fdl.py - $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ - -libfdl.c: dump-tmpl.c gen_dumper.py - $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ - -librecon.c: recon-tmpl.c gen_recon.py - $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c deleted file mode 100644 index 32b0fef6af3..00000000000 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ /dev/null @@ -1,187 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#include <ctype.h> -#endif - -#include "glfs.h" -#include "iatt.h" -#include "xlator.h" -#include "fdl.h" - -/* - * Returns 0 if the string is ASCII printable * - * and -1 if it's not ASCII printable * - */ -int str_isprint (char *s) -{ - int ret = -1; - - if (!s) - goto out; - - while (s[0] != '\0') { - if (!isprint(s[0])) - goto out; - else - s++; - } - - ret = 0; -out: - return ret; -} - -#pragma fragment DICT - { - int key_len, data_len; - char *key_ptr; - char *key_val; - printf ("@ARGNAME@ = dict {\n"); - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - key_val = new_meta + sizeof(int); - new_meta += sizeof(int) + data_len; - if (str_isprint(key_val)) - printf (" %s = <%d bytes>\n", - key_ptr, data_len); - else - printf (" %s = %s <%d bytes>\n", - key_ptr, key_val, data_len); - } - printf ("}\n"); - } - -#pragma fragment DOUBLE - printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), - *((uint64_t *)new_meta)); - new_meta += sizeof(uint64_t); - -#pragma fragment GFID - printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - -#pragma fragment INTEGER - printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), - *((uint32_t *)new_meta)); - new_meta += sizeof(uint32_t); - -#pragma fragment LOC - printf ("@ARGNAME@ = loc {\n"); - printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - if (*(new_meta++)) { - printf (" name = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - printf ("}\n"); - -#pragma fragment STRING - if (*(new_meta++)) { - printf ("@ARGNAME@ = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - -#pragma fragment VECTOR - { - size_t len = *((size_t *)new_meta); - new_meta += sizeof(len); - printf ("@ARGNAME@ = <%zu bytes>\n", len); - new_data += len; - } - -#pragma fragment IATT - { - ia_prot_t *myprot = ((ia_prot_t *)new_meta); - printf ("@ARGNAME@ = iatt {\n"); - printf (" ia_prot = %c%c%c", - myprot->suid ? 'S' : '-', - myprot->sgid ? 'S' : '-', - myprot->sticky ? 'T' : '-'); - printf ("%c%c%c", - myprot->owner.read ? 'r' : '-', - myprot->owner.write ? 'w' : '-', - myprot->owner.exec ? 'x' : '-'); - printf ("%c%c%c", - myprot->group.read ? 'r' : '-', - myprot->group.write ? 'w' : '-', - myprot->group.exec ? 'x' : '-'); - printf ("%c%c%c\n", - myprot->other.read ? 'r' : '-', - myprot->other.write ? 'w' : '-', - myprot->other.exec ? 'x' : '-'); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - printf (" ia_uid = %u\n", myints[0]); - printf (" ia_gid = %u\n", myints[1]); - printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); - printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment FOP -void -fdl_dump_@NAME@ (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - - /* TBD: word size/endianness */ -@FUNCTION_BODY@ - - *old_meta = new_meta; - *old_data = new_data; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - fdl_dump_@NAME@ (&new_meta, &new_data); - break; - -#pragma fragment EPILOG -int -fdl_dump (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - static glfs_t *fs = NULL; - int recognized = 1; - event_header_t *eh; - - /* - * We don't really call anything else in GFAPI, but this is the most - * convenient way to satisfy all of the spurious dependencies on how it - * or glusterfsd initialize (e.g. setting up THIS). - */ - if (!fs) { - fs = glfs_new ("dummy"); - } - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - recognized = 0; - } - - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c deleted file mode 100644 index 145dad7964a..00000000000 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fcntl.h> -#include <unistd.h> -#include <sys/mman.h> -#include "call-stub.h" -#include "iatt.h" -#include "defaults.h" -#include "syscall.h" -#include "xlator.h" -#include "fdl.h" - -/* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) - -enum gf_fdl { - gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, - gf_fdl_mt_end -}; - -typedef struct { - char *type; - off_t size; - char *path; - int fd; - void * ptr; - off_t max_offset; -} log_obj_t; - -typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; -} fdl_private_t; - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); - -void -fdl_enqueue (xlator_t *this, call_stub_t *stub) -{ - fdl_private_t *priv = this->private; - - pthread_mutex_lock (&priv->req_lock); - list_add_tail (&stub->list, &priv->reqs); - pthread_mutex_unlock (&priv->req_lock); - - pthread_cond_signal (&priv->req_cond); -} - -#pragma generate - -char * -fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) -{ - fdl_private_t *priv = this->private; - int ret; - char * ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", - priv->log_dir, this->ctx->cmd_args.log_ident, - obj->type, term); - } - else { - ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", - priv->log_dir, obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log (this->name, GF_LOG_ERROR, - "failed to construct log-file path"); - goto err; - } - - gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", - obj->path, obj->size); - - obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to open log file (%s)", strerror(errno)); - goto err; - } - -#if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd,0,0,obj->size) < 0) -#endif - { - gf_log (this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); - (void) sys_write (obj->fd, "", 1); - } - - ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; - -err: - if (obj->fd >= 0) { - sys_close (obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } - return ptr; -} - -void -fdl_close_term_log (xlator_t *this, log_obj_t *obj) -{ - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void) munmap (obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log (this->name, GF_LOG_INFO, - "truncating term %d %s journal to %ld", - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", - strerror(errno)); - } - sys_close (obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } -} - -gf_boolean_t -fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) -{ - fdl_private_t *priv = this->private; - - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - - ++(priv->term); - - *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } - - *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } - - return _gf_true; -} - -void * -fdl_worker (void *arg) -{ - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char * meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char * data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void * msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!meta_ptr) { - goto *err_label; - } - data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log (this, &priv->meta_log); - goto *err_label; - } - - for (;;) { - pthread_mutex_lock (&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait (&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { - goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, - &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry (priv->reqs.next, call_stub_t, list); - list_del_init (&stub->list); - pthread_mutex_unlock (&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log (this->name, GF_LOG_DEBUG, - "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; - } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize (stub, meta_ptr + *meta_offset, - data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long) (meta_ptr + *meta_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_meta_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", - strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; - } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long) (data_ptr + *data_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_data_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request data (%s)", - strerror(errno)); - } - *data_offset += stub->jnl_data_len; - } - call_resume (stub); - } - -err_locked: - pthread_mutex_unlock (&priv->req_lock); -err_unlocked: - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - return NULL; -} - -int32_t -fdl_ipc_continue (call_frame_t *frame, xlator_t *this, - int32_t op, dict_t *xdata) -{ - /* - * Nothing to be done here. Just Unwind. * - */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); - - return 0; -} - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - call_stub_t *stub; - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; - - switch (op) { - - case FDL_IPC_CHANGE_TERM: - gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal (&priv->req_cond); - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - break; - - case FDL_IPC_GET_TERMS: - gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new (); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict,"first",priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict,"last",priv->term) != 0) { - goto gt_done; - } - gt_err = 0; - gt_done: - if (gt_err) { - STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref (tdict); - } - break; - - case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * In case of a rollback from jbr-server, dump * - * the term and index number in the journal, * - * which will later be used to rollback the fop * - */ - stub = fop_ipc_stub (frame, fdl_ipc_continue, - op, xdata); - fdl_len_ipc (stub); - stub->serialize = fdl_serialize_ipc; - fdl_enqueue (this, stub); - - break; - - default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } - - return 0; -} - -int -fdl_init (xlator_t *this) -{ - fdl_private_t *priv = NULL; - - priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD (&priv->reqs); - if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init (&priv->req_cond, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT ("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - - if (gf_thread_create (&priv->worker, NULL, fdl_worker, this, - "fdlwrker") != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to start fdl_worker"); - goto err; - } - - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -fdl_fini (xlator_t *this) -{ - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal (&priv->req_cond); - pthread_join (priv->worker, NULL); - GF_FREE(priv); - } -} - -int -fdl_reconfigure (xlator_t *this, dict_t *options) -{ - fdl_private_t *priv = this->private; - - GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ - -out: - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("fdl", this, out); - - ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } -out: - return ret; -} - -class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, -}; - -struct volume_options options[] = { - { .key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files." - }, - { .key = {NULL} }, -}; - -struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, -}; diff --git a/xlators/experimental/fdl/src/fdl.h b/xlators/experimental/fdl/src/fdl.h deleted file mode 100644 index 32e38c93f2d..00000000000 --- a/xlators/experimental/fdl/src/fdl.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _FDL_H_ -#define _FDL_H_ - -#define NEW_REQUEST (uint8_t)'N' - -typedef struct { - uint8_t event_type; /* e.g. NEW_REQUEST */ - uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ - uint16_t request_id; - uint32_t ext_length; -} event_header_t; - -enum { - FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ - FDL_IPC_CHANGE_TERM, - FDL_IPC_GET_TERMS, - FDL_IPC_JBR_SERVER_ROLLBACK -}; - -#endif /* _FDL_H_ */ diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py deleted file mode 100755 index 363ba6ce63f..00000000000 --- a/xlators/experimental/fdl/src/gen_dumper.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/python2 - -from __future__ import print_function -import os -import re -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# See the big header comment at the start of gen_fdl.py to see how the stages -# fit together. The big difference here is that *all* of the C code is in the -# template file as labelled fragments, instead of as Python strings. That -# makes it much easier to edit in one place, with proper syntax highlighting -# and indentation. -# -# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of -# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. -# -# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and -# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution -# in the middle of each function) is emitted immediately; the expanded CASE -# code is saved for the next stage. -# -# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code -# in the middle of EPILOG, to generate the whole output file. -# -# Another way of looking at it is to consider how the fragments appear in -# the final output: -# -# PROLOG -# FOP (expanded for CREATE) -# FOP before FUNCTION_BODY -# LOC, INTEGER, GFID, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# FOP (expanded for WRITEV) -# FOP before FUNCTION_BODY -# GFID, VECTOR, etc. (on per arg, by type) -# FOP after FUNCTION_BODY -# (more FOPs) -# EPILOG -# EPILOG before CASE -# CASE statements (one per fop) -# EPILOG after CASE - -typemap = { - 'dict_t *': ( "DICT", ""), - 'fd_t *': ( "GFID", ""), - 'dev_t': ( "DOUBLE", "%ld (0x%lx)"), - 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"), - 'int32_t': ( "INTEGER", "%d (0x%x)"), - 'mode_t': ( "INTEGER", "%d (0x%x)"), - 'off_t': ( "DOUBLE", "%ld (0x%lx)"), - 'size_t': ( "DOUBLE", "%ld (0x%lx)"), - 'uint32_t': ( "INTEGER", "%d (0x%x)"), - 'loc_t *': ( "LOC", ""), - 'const char *': ( "STRING", ""), - 'struct iovec *': ( "VECTOR", ""), - 'struct iatt *': ( "IATT", ""), -} - -def get_special_subs (args): - code = "" - for arg in args: - if (arg[0] != 'fop-arg') or (len(arg) < 4): - continue - recon_type, recon_fmt = typemap[arg[2]] - code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \ - .replace("@FORMAT@",recon_fmt) - return code - -def gen_functions (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value) - # Print the FOP fragment with @FUNCTION_BODY@ in the middle. - code += generate(fragments["FOP"],name,fop_subs) - return code - -def gen_cases (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - # Add the CASE fragment for this fop. - code += generate(fragments["CASE"],name,fop_subs) - return code - -def load_fragments (path="recon-tmpl.c"): - pragma_re = re.compile('pragma fragment (.*)') - cur_symbol = None - cur_value = "" - result = {} - for line in open(path,"r").readlines(): - m = pragma_re.search(line) - if m: - if cur_symbol: - result[cur_symbol] = cur_value - cur_symbol = m.group(1) - cur_value = "" - else: - cur_value += line - if cur_symbol: - result[cur_symbol] = cur_value - return result - -if __name__ == "__main__": - fragments = load_fragments(sys.argv[1]) - print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") - print(fragments["PROLOG"]) - print(gen_functions()) - print(fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())) - print("/* END GENERATED CODE */") diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py deleted file mode 100755 index d2e7dd5dfb2..00000000000 --- a/xlators/experimental/fdl/src/gen_fdl.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/python2 - -from __future__ import print_function -import os -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# Generation occurs in three stages. In this case, it actually makes more -# sense to discuss them in the *opposite* order of that in which they -# actually happen. -# -# Stage 3 is to insert all of the generated code into a file, replacing the -# "#pragma generate" that's already there. The file can thus contain all -# sorts of stuff that's not specific to one fop, either before or after the -# generated code as appropriate. -# -# Stage 2 is to generate all of the code *for a particular fop*, using a -# string-valued template plus a table of substitution values. Most of these -# are built in to the generator itself. However, we also add a couple that -# are specific to this particular translator - LEN_CODE and SER_CODE. These -# are per-fop functions to get the length or the contents (respectively) of -# what we'll put in the log. As with stage 3 allowing per-file boilerplate -# before and after generated code, this allows per-fop boilerplate before and -# after generated code. -# -# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for -# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS -# already are. We do this by looking at the fop-description table in the -# generator module, then doing out own template substitution to plug each -# specific argument name into another string-valued template. -# -# So, what does this leave us with in terms of variables and files? -# -# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE -# strings, which are used to generate the length and serialization code for -# each argument type. -# -# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_ -# prefix), which are used (along with the output from stage 1) to generate -# whole functions. -# -# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert -# the collection of all functions defined in stage 2. - - -LEN_TEMPLATE = """ -void -fdl_len_@NAME@ (call_stub_t *stub) -{ - uint32_t meta_len = sizeof (event_header_t); - uint32_t data_len = 0; - - /* TBD: global stuff, e.g. uid/gid */ -@LEN_CODE@ - - /* TBD: pad extension length */ - stub->jnl_meta_len = meta_len; - stub->jnl_data_len = data_len; -} -""" - -SER_TEMPLATE = """ -void -fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf) -{ - event_header_t *eh; - unsigned long offset = 0; - - /* TBD: word size/endianness */ - eh = (event_header_t *)meta_buf; - eh->event_type = NEW_REQUEST; - eh->fop_type = GF_FOP_@UPNAME@; - eh->request_id = 0; // TBD - meta_buf += sizeof (*eh); -@SER_CODE@ - /* TBD: pad extension length */ - eh->ext_length = offset; -} -""" - -CBK_TEMPLATE = """ -int32_t -fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; -} -""" - -CONTINUE_TEMPLATE = """ -int32_t -fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - STACK_WIND (frame, fdl_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; -} - -""" - -FOP_TEMPLATE = """ -int32_t -fdl_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - call_stub_t *stub; - - stub = fop_@NAME@_stub (frame, default_@NAME@, - @SHORT_ARGS@); - fdl_len_@NAME@ (stub); - stub->serialize = fdl_serialize_@NAME@; - fdl_enqueue (this, stub); - - return 0; -} -""" - -LEN_DICT_TEMPLATE = """ - if (@SRC@) { - data_pair_t *memb; - for (memb = @SRC@->members_list; memb; memb = memb->next) { - meta_len += sizeof(int); - meta_len += strlen(memb->key) + 1; - meta_len += sizeof(int); - meta_len += memb->value->len; - } - } - meta_len += sizeof(int); -""" - -LEN_GFID_TEMPLATE = """ - meta_len += 16; -""" - -LEN_INTEGER_TEMPLATE = """ - meta_len += sizeof (@SRC@); -""" - -# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL -LEN_LOC_TEMPLATE = """ - if (@SRC@.name) { - meta_len += (strlen (@SRC@.name) + 34); - } else { - meta_len += 33; - } -""" - -LEN_STRING_TEMPLATE = """ - if (@SRC@) { - meta_len += (strlen (@SRC@) + 1); - } else { - meta_len += 1; - } -""" - -LEN_VECTOR_TEMPLATE = """ - meta_len += sizeof(size_t); - data_len += iov_length (@VEC@, @CNT@); -""" - -LEN_IATT_TEMPLATE = """ - meta_len += sizeof(@SRC@.ia_prot); - meta_len += sizeof(@SRC@.ia_uid); - meta_len += sizeof(@SRC@.ia_gid); - meta_len += sizeof(@SRC@.ia_atime); - meta_len += sizeof(@SRC@.ia_atime_nsec); - meta_len += sizeof(@SRC@.ia_mtime); - meta_len += sizeof(@SRC@.ia_mtime_nsec); -""" - -SERLZ_DICT_TEMPLATE = """ - if (@SRC@) { - data_pair_t *memb; - for (memb = @SRC@->members_list; memb; memb = memb->next) { - *((int *)(meta_buf+offset)) = strlen(memb->key) + 1; - offset += sizeof(int); - strcpy (meta_buf+offset, memb->key); - offset += strlen(memb->key) + 1; - *((int *)(meta_buf+offset)) = memb->value->len; - offset += sizeof(int); - memcpy (meta_buf+offset, memb->value->data, memb->value->len); - offset += memb->value->len; - } - } - *((int *)(meta_buf+offset)) = 0; - offset += sizeof(int); -""" - -SERLZ_GFID_TEMPLATE = """ - memcpy (meta_buf+offset, @SRC@->inode->gfid, 16); - offset += 16; -""" - -SERLZ_INTEGER_TEMPLATE = """ - memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@)); - offset += sizeof(@SRC@); -""" - -SERLZ_LOC_TEMPLATE = """ - memcpy (meta_buf+offset, @SRC@.gfid, 16); - offset += 16; - memcpy (meta_buf+offset, @SRC@.pargfid, 16); - offset += 16; - if (@SRC@.name) { - *(meta_buf+offset) = 1; - ++offset; - strcpy (meta_buf+offset, @SRC@.name); - offset += (strlen (@SRC@.name) + 1); - } else { - *(meta_buf+offset) = 0; - ++offset; - } -""" - -SERLZ_STRING_TEMPLATE = """ - if (@SRC@) { - *(meta_buf+offset) = 1; - ++offset; - strcpy (meta_buf+offset, @SRC@); - offset += strlen(@SRC@); - } else { - *(meta_buf+offset) = 0; - ++offset; - } -""" - -SERLZ_VECTOR_TEMPLATE = """ - *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@); - offset += sizeof(size_t); - int32_t i; - for (i = 0; i < @CNT@; ++i) { - memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len); - data_buf += @VEC@[i].iov_len; - } -""" - -# We don't need to save all of the fields - only those affected by chown, -# chgrp, chmod, and utime. -SERLZ_IATT_TEMPLATE = """ - *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot; - offset += sizeof(@SRC@.ia_prot); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid; - offset += sizeof(@SRC@.ia_uid); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid; - offset += sizeof(@SRC@.ia_gid); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime; - offset += sizeof(@SRC@.ia_atime); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec; - offset += sizeof(@SRC@.ia_atime_nsec); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime; - offset += sizeof(@SRC@.ia_mtime); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec; - offset += sizeof(@SRC@.ia_mtime_nsec); -""" - -typemap = { - 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE), - 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE), - 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE), - 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE), - 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE), -} - -def get_special_subs (args): - len_code = "" - ser_code = "" - for arg in args: - if (arg[0] != 'fop-arg') or (len(arg) < 4): - continue - # Let this throw an exception if we get an unknown field name. The - # broken build will remind whoever messed with the stub code that a - # corresponding update is needed here. - if arg[3] == "vector": - # Make it as obvious as possible that this is a special case. - len_code += LEN_VECTOR_TEMPLATE \ - .replace("@VEC@","stub->args.vector") \ - .replace("@CNT@","stub->args.count") - ser_code += SERLZ_VECTOR_TEMPLATE \ - .replace("@VEC@","stub->args.vector") \ - .replace("@CNT@","stub->args.count") - else: - len_tmpl, ser_tmpl = typemap[arg[2]] - src = "stub->args.%s" % arg[3] - len_code += len_tmpl.replace("@SRC@",src) - ser_code += ser_tmpl.replace("@SRC@",src) - return len_code, ser_code - -# Mention those fops in the selective_generate table, for which -# only a few common functions will be generated, and mention those -# functions. Rest of the functions can be customized -selective_generate = { - "ipc": "len,serialize", - } - -def gen_fdl (): - entrypoints = [] - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - - # generate all functions for all the fops - # except for the ones in selective_generate for which - # generate only the functions mentioned in the - # selective_generate table - gen_funcs = "len,serialize,callback,continue,fop" - if name in selective_generate: - gen_funcs = selective_generate[name].split(",") - - len_code, ser_code = get_special_subs(value) - fop_subs[name]["@LEN_CODE@"] = len_code[:-1] - fop_subs[name]["@SER_CODE@"] = ser_code[:-1] - if 'len' in gen_funcs: - print(generate(LEN_TEMPLATE,name,fop_subs)) - if 'serialize' in gen_funcs: - print(generate(SER_TEMPLATE,name,fop_subs)) - if name == 'writev': - print("#define DESTAGE_ASYNC") - if 'callback' in gen_funcs: - print(generate(CBK_TEMPLATE,name,cbk_subs)) - if 'continue' in gen_funcs: - print(generate(CONTINUE_TEMPLATE,name,fop_subs)) - if 'fop' in gen_funcs: - print(generate(FOP_TEMPLATE,name,fop_subs)) - if name == 'writev': - print("#undef DESTAGE_ASYNC") - entrypoints.append(name) - print("struct xlator_fops fops = {") - for ep in entrypoints: - print("\t.%s = fdl_%s," % (ep, ep)) - print("};") - -for l in open(sys.argv[1],'r').readlines(): - if l.find('#pragma generate') != -1: - print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") - gen_fdl() - print("/* END GENERATED CODE */") - else: - print(l[:-1]) diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py deleted file mode 100755 index db7f7bbc31c..00000000000 --- a/xlators/experimental/fdl/src/gen_recon.py +++ /dev/null @@ -1,218 +0,0 @@ -#!/usr/bin/python2 - -from __future__ import print_function -import os -import re -import string -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# See the big header comment at the start of gen_fdl.py to see how the stages -# fit together. The big difference here is that *all* of the C code is in the -# template file as labelled fragments, instead of as Python strings. That -# makes it much easier to edit in one place, with proper syntax highlighting -# and indentation. -# -# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of -# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. -# -# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and -# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution -# in the middle of each function) is emitted immediately; the expanded CASE -# code is saved for the next stage. -# -# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code -# in the middle of EPILOG, to generate the whole output file. -# -# Another way of looking at it is to consider how the fragments appear in -# the final output: -# -# PROLOG -# FOP (expanded for CREATE) -# FOP before FUNCTION_BODY -# LOC, INTEGER, GFID, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# FOP (expanded for WRITEV) -# FOP before FUNCTION_BODY -# GFID, VECTOR, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# (more FOPs) -# EPILOG -# EPILOG before CASE -# CASE statements (one per fop) -# EPILOG after CASE - -typemap = { - 'dict_t *': "DICT", - 'fd_t *': "FD", - 'dev_t': "DOUBLE", - 'gf_xattrop_flags_t': "INTEGER", - 'int32_t': "INTEGER", - 'mode_t': "INTEGER", - 'off_t': "DOUBLE", - 'size_t': "DOUBLE", - 'uint32_t': "INTEGER", - 'loc_t *': "LOC", - 'const char *': "STRING", - 'struct iovec *': "VECTOR", - 'struct iatt *': "IATT", - 'struct iobref *': "IOBREF", -} - -def get_special_subs (name, args, fop_type): - code = "" - cleanups = "" - links = "" - s_args = [] - for arg in args: - if arg[0] == 'extra': - code += "\t%s %s;\n\n" % (arg[2], arg[1]) - s_args.append(arg[3]) - continue - if arg[0] == 'link': - links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \ - .replace("@IATT_ARG@",arg[2]) - continue - if arg[0] != 'fop-arg': - continue - if (name, arg[1]) == ('writev', 'count'): - # Special case: just skip this. We can't mark it as 'nosync' - # because of the way the translator and dumper generators look for - # that after 'stub-name' which we don't define. Instead of adding a - # bunch of generic infrastructure for this one case, just pound it - # here. - continue - recon_type = typemap[arg[2]] - # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type) - if (name == "create") and (arg[1] == "fd"): - # Special case: fd for create is new, not looked up. - # print "/* change to NEW_FD */" - recon_type = "NEW_FD" - elif (recon_type == "LOC") and (fop_type == "entry-op"): - # Need to treat this differently for inode vs. entry ops. - # Special case: link source is treated like inode-op. - if (name != "link") or (arg[1] != "oldloc"): - # print "/* change to PARENT_LOC */" - recon_type = "PARENT_LOC" - code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \ - .replace("@ARGTYPE@",arg[2]) - cleanup_key = recon_type + "_CLEANUP" - if fragments.has_key(cleanup_key): - new_frag = fragments[cleanup_key].replace("@ARGNAME@",arg[1]) - # Make sure these get added in *reverse* order. Otherwise, a - # failure for an earlier argument might goto a label that falls - # through to the cleanup code for a variable associated with a - # later argument, but that variable might not even have been - # *declared* (let alone initialized) yet. Consider the following - # case. - # - # process argument A (on failure goto cleanup_A) - # set error label to cleanup_A - # - # declare pointer variable for argument B - # process argument B (on failure goto cleanup_B) - # - # cleanup_A: - # /* whatever */ - # cleanup_B: - # free pointer variable <= "USED BUT NOT SET" error here - # - # By adding these in reverse order, we ensure that cleanup_B is - # actually *before* cleanup_A, and nothing will try to do the free - # until we've actually attempted processing of B. - cleanups = new_frag + cleanups - if 'nosync' in arg[4:]: - code += "\t(void)%s;\n" % arg[1]; - continue - if arg[2] in ("loc_t *", "struct iatt *"): - # These are passed as pointers to the syncop, but they're actual - # structures in the generated code. - s_args.append("&"+arg[1]); - else: - s_args.append(arg[1]) - # We have to handle a couple of special cases here, because some n00b - # defined the syncops with a different argument order than the fops they're - # based on. - if name == 'writev': - # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which - # is not stored in or read from the journal. There are other ways to - # do that, but this is the only place we need anything similar and we - # already have to treat it as a special case so this is simplest. - s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata' - elif name == 'symlink': - # Swap 'linkpath' and 'loc'. - s_args_str = '&loc, linkpath, &iatt, xdata' - elif name == 'xattrop': - s_args_str = '&loc, flags, dict, xdata, NULL' - elif name == 'fxattrop': - s_args_str = 'fd, flags, dict, xdata, NULL' - else: - s_args_str = string.join (s_args, ", ") - return code, links, s_args_str, cleanups - -# TBD: probably need to generate type-specific cleanup code as well - e.g. -# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these -# generated CLEANUP fragments will go at the end of the function, with goto -# labels. Meanwhile, the error-checking part of each type-specific fragment -# (e.g. LOC or FD) will need to update the indirect label that we jump to when -# an error is detected. This will probably get messy. -def gen_functions (): - code = "" - for name, value in ops.iteritems(): - fop_type = [ x[1] for x in value if x[0] == "journal" ] - if not fop_type: - continue - body, links, syncop_args, cleanups = get_special_subs (name, value, - fop_type[0]) - fop_subs[name]["@FUNCTION_BODY@"] = body - fop_subs[name]["@LINKS@"] = links - fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args - fop_subs[name]["@CLEANUPS@"] = cleanups - if name == "writev": - # Take advantage of the fact that, *during reconciliation*, the - # vector is always a single element. In normal I/O it's not. - fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len" - else: - fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS" - # Print the FOP fragment with @FUNCTION_BODY@ in the middle. - code += generate(fragments["FOP"],name,fop_subs) - return code - -def gen_cases (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - # Add the CASE fragment for this fop. - code += generate(fragments["CASE"],name,fop_subs) - return code - -def load_fragments (path="recon-tmpl.c"): - pragma_re = re.compile('pragma fragment (.*)') - cur_symbol = None - cur_value = "" - result = {} - for line in open(path,"r").readlines(): - m = pragma_re.search(line) - if m: - if cur_symbol: - result[cur_symbol] = cur_value - cur_symbol = m.group(1) - cur_value = "" - else: - cur_value += line - if cur_symbol: - result[cur_symbol] = cur_value - return result - -if __name__ == "__main__": - fragments = load_fragments(sys.argv[1]) - print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") - print(fragments["PROLOG"]) - print(gen_functions()) - print(fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())) - print("/* END GENERATED CODE */") diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c deleted file mode 100644 index 7c979c32a04..00000000000 --- a/xlators/experimental/fdl/src/logdump.c +++ /dev/null @@ -1,50 +0,0 @@ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/mman.h> - -extern int fdl_dump (char **, char **); - -int -main (int argc, char **argv) -{ - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; - - meta_fd = open (argv[1], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - data_fd = open (argv[2], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - for (;;) { - if (!fdl_dump(&meta_buf,&data_buf)) { - break; - } - } - - return EXIT_SUCCESS; -} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c deleted file mode 100644 index 228860401ae..00000000000 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ /dev/null @@ -1,304 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "iatt.h" -#include "syncop.h" -#include "xlator.h" -#include "glfs-internal.h" - -#include "fdl.h" - -#define GFAPI_SUCCESS 0 - -inode_t * -recon_get_inode (glfs_t *fs, uuid_t gfid) -{ - inode_t *inode; - loc_t loc = {NULL,}; - struct iatt iatt; - int ret; - inode_t *newinode; - - inode = inode_find (fs->active_subvol->itable, gfid); - if (inode) { - printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); - return inode; - } - - loc.inode = inode_new (fs->active_subvol->itable); - if (!loc.inode) { - return NULL; - } - gf_uuid_copy (loc.inode->gfid, gfid); - gf_uuid_copy (loc.gfid, gfid); - - printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - - ret = syncop_lookup (fs->active_subvol, &loc, &iatt, - NULL, NULL, NULL); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "syncop_lookup failed (%d)\n", ret); - return NULL; - } - - newinode = inode_link (loc.inode, NULL, NULL, &iatt); - if (newinode) { - inode_lookup (newinode); - } - - return newinode; -} - -#pragma fragment DICT - dict_t *@ARGNAME@; - - @ARGNAME@ = dict_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - - { - int key_len, data_len; - char *key_ptr; - int garbage; - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int); - garbage = dict_set_static_bin (@ARGNAME@, key_ptr, - new_meta, data_len); - /* TBD: check error from dict_set_static_bin */ - (void)garbage; - new_meta += data_len; - } - } - -#pragma fragment DICT_CLEANUP -cleanup_@ARGNAME@: - dict_unref (@ARGNAME@); - -#pragma fragment DOUBLE - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - new_meta += sizeof(uint64_t); - -#pragma fragment FD - inode_t *@ARGNAME@_ino; - fd_t *@ARGNAME@; - - @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); - new_meta += 16; - if (!@ARGNAME@_ino) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@_ino; - - @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); -cleanup_@ARGNAME@_ino: - inode_unref (@ARGNAME@_ino); - -#pragma fragment NEW_FD - /* - * This pseudo-type is only used for create, and in that case we know - * we'll be using loc.inode, so it's not worth generalizing to take an - * extra argument. - */ - fd_t *@ARGNAME@ = fd_anonymous (loc.inode); - - if (!fd) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - new_meta += 16; - -#pragma fragment NEW_FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); - -#pragma fragment INTEGER - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - - new_meta += sizeof(@ARGTYPE@); - -#pragma fragment LOC - loc_t @ARGNAME@ = { NULL, }; - - @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.inode) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); - new_meta += 16; - new_meta += 16; /* skip over pargfid */ - if (*(new_meta++)) { - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - } - -#pragma fragment LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment PARENT_LOC - loc_t @ARGNAME@ = { NULL, }; - - new_meta += 16; /* skip over gfid */ - @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.parent) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); - new_meta += 16; - if (!*(new_meta++)) { - goto *err_label; - } - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - - @ARGNAME@.inode = inode_new (fs->active_subvol->itable); - if (!@ARGNAME@.inode) { - goto *err_label; - } - -#pragma fragment PARENT_LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment STRING - char *@ARGNAME@; - if (*(new_meta++)) { - @ARGNAME@ = new_meta; - new_meta += (strlen(new_meta) + 1); - } - else { - goto *err_label; - } - -#pragma fragment VECTOR - struct iovec @ARGNAME@; - - @ARGNAME@.iov_len = *((size_t *)new_meta); - new_meta += sizeof(@ARGNAME@.iov_len); - @ARGNAME@.iov_base = new_data; - new_data += @ARGNAME@.iov_len; - -#pragma fragment IATT - struct iatt @ARGNAME@; - { - @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - @ARGNAME@.ia_uid = myints[0]; - @ARGNAME@.ia_gid = myints[1]; - @ARGNAME@.ia_atime = myints[2]; - @ARGNAME@.ia_atime_nsec = myints[3]; - @ARGNAME@.ia_mtime = myints[4]; - @ARGNAME@.ia_mtime_nsec = myints[5]; - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment IOBREF - struct iobref *@ARGNAME@; - - @ARGNAME@ = iobref_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment IOBREF_CLEANUP -cleanup_@ARGNAME@: - iobref_unref (@ARGNAME@); - -#pragma fragment LINK - /* TBD: check error */ - inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); - if (new_inode) { - inode_lookup (new_inode); - } - -#pragma fragment FOP -int -fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int ret; - int status = 0xbad; - void *err_label = &&done; - -@FUNCTION_BODY@ - - ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); - if (ret != @SUCCESS_VALUE@) { - fprintf (stderr, "syncop_@NAME@ returned %d", ret); - goto *err_label; - } - -@LINKS@ - - status = 0; - -@CLEANUPS@ - -done: - *old_meta = new_meta; - *old_data = new_data; - return status; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { - goto done; - } - recognized = 1; - break; - -#pragma fragment EPILOG -int -recon_execute (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int recognized = 0; - event_header_t *eh; - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - } - -done: - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c deleted file mode 100644 index 14168a011e0..00000000000 --- a/xlators/experimental/fdl/src/recon.c +++ /dev/null @@ -1,89 +0,0 @@ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/mman.h> - -#include "glusterfs.h" -#include "fd.h" -#include "syncop.h" -#include "glfs-internal.h" - -#define GFAPI_SUCCESS 0 - -extern int recon_execute (glfs_t *, char **, char **); - -int -main (int argc, char **argv) -{ - glfs_t *fs; - int ret; - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; - - fs = glfs_new ("whocares"); - if (!fs) { - fprintf (stderr, "glfs_new failed\n"); - return EXIT_FAILURE; - } - - if (getenv("RECON_DEBUG")) { - ret = glfs_set_logging (fs, "/dev/stderr", 7); - } - else { - ret = glfs_set_logging (fs, "/dev/null", 0); - } - - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); - return EXIT_FAILURE; - } - - ret = glfs_set_volfile (fs, argv[1]); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); - return EXIT_FAILURE; - } - - ret = glfs_init (fs); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_init failed (%d)\n", errno); - return EXIT_FAILURE; - } - - meta_fd = open (argv[2], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - data_fd = open (argv[3], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - for (;;) { - if (!recon_execute(fs,&meta_buf,&data_buf)) { - break; - } - } - - return EXIT_SUCCESS; -} diff --git a/xlators/experimental/jbr-client/Makefile.am b/xlators/experimental/jbr-client/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/jbr-client/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am deleted file mode 100644 index a894e69c8d7..00000000000 --- a/xlators/experimental/jbr-client/src/Makefile.am +++ /dev/null @@ -1,34 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = jbrc.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -nodist_jbrc_la_SOURCES = jbrc-cg.c -CLEANFILES = $(nodist_jbrc_la_SOURCES) - -jbrc_la_LDFLAGS = -module -avoid-version -jbrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = $(top_srcdir)/xlators/lib/src/libxlator.h \ - $(top_srcdir)/glusterfsd/src/glusterfsd.h \ - jbrc.h jbr-messages.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/xlators/lib/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ - -I$(top_srcdir)/rpc/rpc-lib/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src -JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py -JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c -JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c -noinst_PYTHON = $(JBRC_GEN_FOPS) -EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER) - -jbrc-cg.c: $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) - $(PYTHON) $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) > $@ - -uninstall-local: - rm -f $(DESTDIR)$(xlatordir)/jbr.so diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c deleted file mode 100644 index 7719f511f01..00000000000 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ /dev/null @@ -1,113 +0,0 @@ -/* template-name fop */ -int32_t -jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = NULL; - xlator_t *target_xl = ACTIVE_CHILD(this); - - local = mem_get(this->local_pool); - if (!local) { - goto err; - } - - local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - local->curr_xl = target_xl; - local->scars = 0; - - frame->local = local; - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, - target_xl, target_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - if (local) { - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, - @ERROR_ARGS@); - return 0; -} - -/* template-name cbk */ -int32_t -jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - xlator_t *last_xl = cookie; - xlator_t *next_xl; - jbrc_private_t *priv = this->private; - struct timespec spec; - - if (op_ret != (-1)) { - if (local->scars) { - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retried %p OK"), frame->local); - } - priv->active = last_xl; - goto unwind; - } - if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { - goto unwind; - } - - /* TBD: get leader ID from xdata? */ - next_xl = next_xlator(this, last_xl); - /* - * We can't just give up after we've tried all bricks, because it's - * quite likely that a new leader election just hasn't finished yet. - * We also shouldn't retry endlessly, and especially not at a high - * rate, but that's good enough while we work on other things. - * - * TBD: implement slow/finite retry via a worker thread - */ - if (!next_xl || (local->scars >= SCAR_LIMIT)) { - gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, - HILITE("ran out of retries for %p"), frame->local); - goto unwind; - } - - local->curr_xl = next_xl; - local->scars += 1; - spec.tv_sec = 1; - spec.tv_nsec = 0; - /* - * WARNING - * - * Just calling gf_timer_call_after like this leaves open the - * possibility that writes will get reordered, if a first write is - * rescheduled and then a second comes along to find an updated - * priv->active before the first actually executes. We might need to - * implement a stricter (and more complicated) queuing mechanism to - * ensure absolute consistency in this case. - */ - if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { - return 0; - } - -unwind: - call_stub_destroy(local->stub); - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; -} - -/* template-name cont-func */ -int32_t -jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, - local->curr_xl, local->curr_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; -} diff --git a/xlators/experimental/jbr-client/src/gen-fops.py b/xlators/experimental/jbr-client/src/gen-fops.py deleted file mode 100755 index 9893e0c5968..00000000000 --- a/xlators/experimental/jbr-client/src/gen-fops.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/python2 - -from __future__ import print_function -import os -import re -import string -import sys - -curdir = os.path.dirname(sys.argv[0]) -gendir = os.path.join(curdir,'../../../../libglusterfs/src') -sys.path.append(gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# We really want the callback argument list, even when we're generating fop -# code, so we propagate here. -# TBD: this should probably be right in generate.py -for k, v in cbk_subs.iteritems(): - fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@'] - -# Stolen from old codegen.py -def load_templates (path): - templates = {} - tmpl_re = re.compile("/\* template-name (.*) \*/") - templates = {} - t_name = None - for line in open(path,"r").readlines(): - if not line: - break - m = tmpl_re.match(line) - if m: - if t_name: - templates[t_name] = string.join(t_contents,'') - t_name = m.group(1).strip() - t_contents = [] - elif t_name: - t_contents.append(line) - if t_name: - templates[t_name] = string.join(t_contents,'') - return templates - -# Stolen from gen_fdl.py -def gen_client (templates): - for name, value in ops.iteritems(): - if name == 'getspec': - # It's not real if it doesn't have a stub function. - continue - print(generate(templates['cbk'],name,cbk_subs)) - print(generate(templates['cont-func'],name,fop_subs)) - print(generate(templates['fop'],name,fop_subs)) - -tmpl = load_templates(sys.argv[1]) -for l in open(sys.argv[2],'r').readlines(): - if l.find('#pragma generate') != -1: - print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") - gen_client(tmpl) - print("/* END GENERATED CODE */") - else: - print(l[:-1]) diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h deleted file mode 100644 index 9f210184d81..00000000000 --- a/xlators/experimental/jbr-client/src/jbr-messages.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _JBR_MESSAGES_H_ -#define _JBR_MESSAGES_H_ - -#include "glfs-message-id.h" - -/* To add new message IDs, append new identifiers at the end of the list. - * - * Never remove a message ID. If it's not used anymore, you can rename it or - * leave it as it is, but not delete it. This is to prevent reutilization of - * IDs by other messages. - * - * The component name must match one of the entries defined in - * glfs-message-id.h. - */ - -GLFS_MSGID(JBR, - J_MSG_INIT_FAIL, - J_MSG_RETRY_MSG, - J_MSG_MEM_ERR, - J_MSG_DICT_FLR, - J_MSG_GENERIC, - J_MSG_INVALID, - J_MSG_NO_DATA, - J_MSG_SYS_CALL_FAILURE, - J_MSG_QUORUM_NOT_MET, - J_MSG_LOCK_FAILURE -); - -#endif /* _JBR_MESSAGES_H_ */ diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c deleted file mode 100644 index 9bb9346c5c0..00000000000 --- a/xlators/experimental/jbr-client/src/jbrc.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "call-stub.h" -#include "defaults.h" -#include "timer.h" -#include "xlator.h" -#include "jbr-messages.h" -#include "jbrc.h" -#include "statedump.h" - -#define SCAR_LIMIT 20 -#define HILITE(x) ("[1;33m"x"[0m") - -/* - * The fops are actually generated by gen-fops.py; the rest was mostly copied - * from defaults.c (commit cd253754 on 27 August 2013). - */ - -enum gf_dht_mem_types_ { - gf_mt_jbrc_private_t = gf_common_mt_end + 1, - gf_mt_jbrc_end -}; - -char *JBRC_XATTR = "user.jbr.active"; - -static inline -xlator_t * -ACTIVE_CHILD (xlator_t *parent) -{ - jbrc_private_t *priv = parent->private; - - return priv ? priv->active : FIRST_CHILD(parent); -} - -xlator_t * -next_xlator (xlator_t *this, xlator_t *prev) -{ - xlator_list_t *trav; - - for (trav = this->children; trav; trav = trav->next) { - if (trav->xlator == prev) { - return trav->next ? trav->next->xlator - : this->children->xlator; - } - } - - return NULL; -} - -void -jbrc_retry_cb (void *cb_arg) -{ - jbrc_local_t *local = cb_arg; - - gf_msg (__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retrying %p"), local); - call_resume_wind(local->stub); -} - -#pragma generate - -int32_t -jbrc_forget (xlator_t *this, inode_t *inode) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement forget_cbk"); - return 0; -} - - -int32_t -jbrc_releasedir (xlator_t *this, fd_t *fd) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement releasedir_cbk"); - return 0; -} - -int32_t -jbrc_release (xlator_t *this, fd_t *fd) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement release_cbk"); - return 0; -} - -struct xlator_fops fops = { - .lookup = jbrc_lookup, - .stat = jbrc_stat, - .fstat = jbrc_fstat, - .truncate = jbrc_truncate, - .ftruncate = jbrc_ftruncate, - .access = jbrc_access, - .readlink = jbrc_readlink, - .mknod = jbrc_mknod, - .mkdir = jbrc_mkdir, - .unlink = jbrc_unlink, - .rmdir = jbrc_rmdir, - .symlink = jbrc_symlink, - .rename = jbrc_rename, - .link = jbrc_link, - .create = jbrc_create, - .open = jbrc_open, - .readv = jbrc_readv, - .writev = jbrc_writev, - .flush = jbrc_flush, - .fsync = jbrc_fsync, - .opendir = jbrc_opendir, - .readdir = jbrc_readdir, - .readdirp = jbrc_readdirp, - .fsyncdir = jbrc_fsyncdir, - .statfs = jbrc_statfs, - .setxattr = jbrc_setxattr, - .getxattr = jbrc_getxattr, - .fsetxattr = jbrc_fsetxattr, - .fgetxattr = jbrc_fgetxattr, - .removexattr = jbrc_removexattr, - .fremovexattr = jbrc_fremovexattr, - .lk = jbrc_lk, - .inodelk = jbrc_inodelk, - .finodelk = jbrc_finodelk, - .entrylk = jbrc_entrylk, - .fentrylk = jbrc_fentrylk, - .rchecksum = jbrc_rchecksum, - .xattrop = jbrc_xattrop, - .fxattrop = jbrc_fxattrop, - .setattr = jbrc_setattr, - .fsetattr = jbrc_fsetattr, - .fallocate = jbrc_fallocate, - .discard = jbrc_discard, -}; - -struct xlator_cbks cbks = { -}; - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("jbrc", this, out); - - ret = xlator_mem_acct_init (this, gf_mt_jbrc_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "Memory accounting init failed"); - return ret; - } -out: - return ret; -} - - -int32_t -jbrc_init (xlator_t *this) -{ - jbrc_private_t *priv = NULL; - xlator_list_t *trav = NULL; - - this->local_pool = mem_pool_new (jbrc_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "failed to create jbrc_local_t pool"); - goto err; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_mt_jbrc_private_t); - if (!priv) { - goto err; - } - - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } - - priv->active = FIRST_CHILD(this); - this->private = priv; - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -jbrc_fini (xlator_t *this) -{ - GF_FREE(this->private); -} - -int -jbrc_get_child_index (xlator_t *this, xlator_t *kid) -{ - xlator_list_t *trav; - int retval = -1; - - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } - } - - return -1; -} - -uint8_t -jbrc_count_up_kids (jbrc_private_t *priv) -{ - uint8_t retval = 0; - uint8_t i; - - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } - } - - return retval; -} - -int32_t -jbrc_notify (xlator_t *this, int32_t event, void *data, ...) -{ - int32_t ret = 0; - int32_t index = 0; - jbrc_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - switch (event) { - case GF_EVENT_CHILD_UP: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state |= (1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - ret = default_notify (this, event, data); - break; - case GF_EVENT_CHILD_DOWN: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state &= ~(1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - break; - default: - ret = default_notify (this, event, data); - } - -out: - return ret; -} - -int -jbrc_priv_dump (xlator_t *this) -{ - jbrc_private_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - xlator_list_t *trav = NULL; - int32_t i = -1; - - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", - this->type, this->name); - gf_proc_dump_add_section(key_prefix); - - gf_proc_dump_write("up_children", "%u", priv->up_children); - - for (trav = this->children, i = 0; trav; trav = trav->next, i++) { - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i); - gf_proc_dump_write(key_prefix, "%s", trav->xlator->name); - } - -out: - return 0; -} - -struct xlator_dumpops dumpops = { - .priv = jbrc_priv_dump, -}; - -class_methods_t class_methods = { - .init = jbrc_init, - .fini = jbrc_fini, - .notify = jbrc_notify, -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/experimental/jbr-client/src/jbrc.h b/xlators/experimental/jbr-client/src/jbrc.h deleted file mode 100644 index c83259ca1bd..00000000000 --- a/xlators/experimental/jbr-client/src/jbrc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _JBRC_H_ -#define _JBRC_H_ - -typedef struct { - xlator_t *active; - uint8_t up_children; - uint8_t n_children; - uint32_t kid_state; -} jbrc_private_t; - -typedef struct { - call_stub_t *stub; - xlator_t *curr_xl; - uint16_t scars; -} jbrc_local_t; - -#endif /* _JBRC_H_ */ diff --git a/xlators/experimental/jbr-server/Makefile.am b/xlators/experimental/jbr-server/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/jbr-server/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am deleted file mode 100644 index fe1342dbaff..00000000000 --- a/xlators/experimental/jbr-server/src/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = jbr.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -nodist_jbr_la_SOURCES = jbr-cg.c -CLEANFILES = $(nodist_jbr_la_SOURCES) - -jbr_la_LDFLAGS = -module -avoid-version -jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/api/src/libgfapi.la - -noinst_HEADERS = jbr-internal.h \ - $(top_srcdir)/xlators/lib/src/libxlator.h \ - $(top_srcdir)/xlators/experimental/fdl/src/fdl.h \ - $(top_srcdir)/glusterfsd/src/glusterfsd.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ - -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(top_srcdir)/xlators/experimental/fdl/src/ \ - -DSBIN_DIR=\"$(sbindir)\" -I$(top_srcdir)/api/src \ - -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \ - -I$(top_srcdir)/xlators/experimental/jbr-client/src/ - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src -JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py -JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c -JBR_WRAPPER = $(JBR_PREFIX)/jbr.c -noinst_PYTHON = $(JBR_GEN_FOPS) -EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER) - -jbr-cg.c: $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) - $(PYTHON) $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) > $@ - -uninstall-local: - rm -f $(DESTDIR)$(xlatordir)/jbr.so diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c deleted file mode 100644 index 9720442e63f..00000000000 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * You can put anything here - it doesn't even have to be a comment - and it - * will be ignored until we reach the first template-name comment. - */ - - -/* template-name read-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_private_t *priv = NULL; - gf_boolean_t in_recon = _gf_false; - int32_t op_errno = 0; - int32_t recon_term, recon_index; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - - op_errno = EREMOTE; - - /* allow reads during reconciliation * - * TBD: allow "dirty" reads on non-leaders * - */ - if (xdata && - (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && - (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { - in_recon = _gf_true; - } - - if ((!priv->leader) && (in_recon == _gf_false)) { - goto err; - } - - STACK_WIND (frame, default_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; -} - -/* template-name read-perform_local_op */ -/* No "perform_local_op" function needed for @NAME@ */ - -/* template-name read-dispatch */ -/* No "dispatch" function needed for @NAME@ */ - -/* template-name read-call_dispatch */ -/* No "call_dispatch" function needed for @NAME@ */ - -/* template-name read-fan-in */ -/* No "fan-in" function needed for @NAME@ */ - -/* template-name read-continue */ -/* No "continue" function needed for @NAME@ */ - -/* template-name read-complete */ -/* No "complete" function needed for @NAME@ */ - -/* template-name write-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - int op_errno = ENOMEM; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - -#if defined(JBR_CG_NEED_FD) - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd); -#else - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL); -#endif - if (ret) - goto err; - - local = frame->local; - - /* - * If we let it through despite not being the leader, then we just want - * to pass it on down without all of the additional xattrs, queuing, and - * so on. However, jbr_*_complete does depend on the initialization - * immediately above this. - */ - if (!priv->leader) { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - } - - ret = jbr_initialize_xdata_set_attrs (this, &xdata); - if (ret) - goto err; - - local->xdata = dict_ref(xdata); - local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - - /* - * Can be used to just call_dispatch or be customised per fop to * - * perform ops specific to that particular fop. * - */ - ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno, - @SHORT_ARGS@); - if (ret) - goto err; - - return ret; -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; -} - -/* template-name write-perform_local_op */ -int32_t -jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - - ret = jbr_@NAME@_call_dispatch (frame, this, op_errno, - @SHORT_ARGS@); - -out: - return ret; -} - -/* template-name write-call_dispatch */ -int32_t -jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - -#if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - if (!ictx) { - *op_errno = EIO; - goto out; - } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - /* - * TBD: enqueue only for real conflict - * - * Currently we just act like all writes are in - * conflict with one another. What we should really do - * is check the active/pending queues and defer only if - * there's a conflict there. - * - * It's important to check the pending queue because we - * might have an active request X which conflicts with - * a pending request Y, and this request Z might - * conflict with Y but not X. If we checked only the - * active queue then Z could jump ahead of Y, which - * would be incorrect. - */ - local->qstub = fop_@NAME@_stub (frame, - jbr_@NAME@_dispatch, - @SHORT_ARGS@); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } - UNLOCK(&ictx->lock); -#endif - ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); - -out: - return ret; -} - -/* template-name write-dispatch */ -int32_t -jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - xlator_list_t *trav; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* - * TBD: unblock pending request(s) if we fail after this point but - * before we get to jbr_@NAME@_complete (where that code currently - * resides). - */ - - local->call_count = priv->n_children - 1; - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_@NAME@_fan_in, - trav->xlator, trav->xlator->fops->@NAME@, - @SHORT_ARGS@); - } - - /* TBD: variable Issue count */ - ret = 0; -out: - return ret; -} - -/* template-name write-fan-in */ -int32_t -jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - /* TBD: variable Completion count */ - if (call_count == 0) { - call_resume(local->stub); - } - - ret = 0; -out: - return ret; -} - -/* template-name write-continue */ -int32_t -jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - jbr_private_t *priv = NULL; - int32_t op_errno = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* Perform quorum check to see if the leader needs * - * to perform the operation. If the operation will not * - * meet quorum irrespective of the leader's result * - * there is no point in the leader performing the fop * - */ - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " - "to meet quorum. Failing the operation without trying " - "it on the leader."); - -#if defined(JBR_CG_QUEUE) - /* - * In case of a fop failure, before unwinding need to * - * remove it from queue * - */ - ret = jbr_remove_from_queue (frame, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_GENERIC, "Failed to remove from queue."); - } -#endif - - /* - * In this case, the quorum is not met on the followers * - * So the operation will not be performed on the leader * - * and a rollback will be sent via GF_FOP_IPC to all the * - * followers, where this particular fop's term and index * - * numbers will be journaled, and later used to rollback * - */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set rollback-fop"); - } else { - new_local->xdata = dict_ref(local->xdata); - new_frame->local = new_local; - jbr_ipc_call_dispatch (new_frame, - this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local for new_frame"); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } - - STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, - @ERROR_ARGS@); - } else { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - } - -out: - return 0; -} - -/* template-name write-complete */ -int32_t -jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, local, err); - - /* If the fop failed on the leader, then reduce one succesful ack - * before calculating the fop quorum - */ - LOCK(&frame->lock); - if (op_ret == -1) - (local->successful_acks)--; - UNLOCK(&frame->lock); - -#if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto err; -#endif - -#if defined(JBR_CG_FSYNC) - jbr_mark_fd_dirty(this, local); -#endif - -#if defined(JBR_CG_NEED_FD) - fd_unref(local->fd); -#endif - - /* After the leader completes the fop, a quorum check is * - * performed, taking into account the outcome of the fop * - * on the leader. Irrespective of the fop being successful * - * or failing on the leader, the result of the quorum will * - * determine if the overall fop is successful or not. For * - * example, a fop might have succeeded on every node except * - * the leader, in which case as quorum is being met, the fop * - * will be treated as a successful fop, even though it failed * - * on the leader. On follower nodes, no quorum check should * - * be done, and the result is returned to the leader as is. * - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Quorum is not met. " - "The operation has failed."); - /* - * In this case, the quorum is not met after the * - * operation is performed on the leader. Hence a * - * rollback will be sent via GF_FOP_IPC to the leader * - * where this particular fop's term and index numbers * - * will be journaled, and later used to rollback. * - * The same will be done on all the followers * - */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "op = %d", - new_frame->op); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, - GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set " - "rollback-fop"); - } else { - new_local->xdata = dict_ref (local->xdata); - new_frame->local = new_local; - /* - * Calling STACK_WIND instead * - * of jbr_ipc as it will not * - * unwind to the previous * - * translators like it will * - * in case of jbr_ipc. * - */ - STACK_WIND (new_frame, - jbr_ipc_complete, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local " - "for new_frame"); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } - } else { -#if defined(JBR_CG_NEED_FD) - op_ret = local->successful_op_ret; -#else - op_ret = 0; -#endif - op_errno = 0; - gf_msg_debug (this->name, 0, - "Quorum has met. The operation has succeeded."); - } - } - - /* - * Unrefing the reference taken in jbr_@NAME@ () * - */ - dict_unref (local->xdata); - - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - - - return 0; - -err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, 0, - @SHORT_ARGS@); - - return 0; -} diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py deleted file mode 100755 index e728f473372..00000000000 --- a/xlators/experimental/jbr-server/src/gen-fops.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/python2 - -# This script generates the boilerplate versions of most fops and cbks in the -# server. This allows the details of leadership-status checking, sequencing -# between leader and followers (including fan-out), and basic error checking -# to be centralized one place, with per-operation code kept to a minimum. - -from __future__ import print_function -import os -import re -import string -import sys - -curdir = os.path.dirname(sys.argv[0]) -gendir = os.path.join(curdir,'../../../../libglusterfs/src') -sys.path.append(gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# We really want the callback argument list, even when we're generating fop -# code, so we propagate here. -# TBD: this should probably be right in generate.py -for k, v in cbk_subs.iteritems(): - fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@'] - -# Stolen from old codegen.py -def load_templates (path): - templates = {} - tmpl_re = re.compile("/\* template-name (.*) \*/") - templates = {} - t_name = None - for line in open(path,"r").readlines(): - if not line: - break - m = tmpl_re.match(line) - if m: - if t_name: - templates[t_name] = string.join(t_contents,'') - t_name = m.group(1).strip() - t_contents = [] - elif t_name: - t_contents.append(line) - if t_name: - templates[t_name] = string.join(t_contents,'') - return templates - -# We need two types of templates. The first, for pure read operations, just -# needs to do a simple am-i-leader check (augmented to allow dirty reads). -# The second, for pure writes, needs to do fan-out to followers between those -# initial checks and local execution. There are other operations that don't -# fit neatly into either category - e.g. lock ops or fsync - so we'll just have -# to handle those manually. The table thus includes entries only for those we -# can categorize. The special cases, plus any new operations we've never even -# heard of, aren't in there. -# -# Various keywords can be used to define/undefine preprocessor symbols used -# in the templates, on a per-function basis. For example, if the keyword here -# is "fsync" (lowercase word or abbreviation) that will cause JBR_CG_FSYNC -# (prefix plus uppercase version) to be defined above all of the generated code -# for that fop. - -fop_table = { - "access": "read", - "create": "write", - "discard": "write", -# "entrylk": "read", - "fallocate": "write", -# "fentrylk": "read", - "fgetxattr": "read", -# "finodelk": "read", -# "flush": "read", - "fremovexattr": "write", - "fsetattr": "write", - "fsetxattr": "write", - "fstat": "read", -# "fsync": "read", -# "fsyncdir": "read", - "ftruncate": "write", - "fxattrop": "write", - "getxattr": "read", -# "inodelk": "read", - "link": "write", - "lk": "write,queue", -# "lookup": "read", - "mkdir": "write", - "mknod": "write", - "open": "write", - "opendir": "read", - "rchecksum": "read", - "readdir": "read", - "readdirp": "read", - "readlink": "read", - "readv": "read", - "removexattr": "write", - "rename": "write", - "rmdir": "write", - "setattr": "write", - "setxattr": "write", - "stat": "read", - "statfs": "read", - "symlink": "write", - "truncate": "write", - "unlink": "write", - "writev": "write,fsync,queue", - "xattrop": "write", - "ipc": "write", -} - -# Mention those fops in the selective_generate table, for which -# only a few common functions will be generated, and mention those -# functions. Rest of the functions can be customized -selective_generate = { - "lk": "fop,dispatch,call_dispatch", - "ipc": "dispatch,call_dispatch", -} - -# Stolen from gen_fdl.py -def gen_server (templates): - fops_done = [] - for name in fop_table.keys(): - info = fop_table[name].split(",") - kind = info[0] - flags = info[1:] - - # generate all functions for the fops in fop_table - # except for the ones in selective_generate for which - # generate only the functions mentioned in the - # selective_generate table - gen_funcs = "fop,complete,continue,fan-in,dispatch, \ - call_dispatch,perform_local_op" - if name in selective_generate: - gen_funcs = selective_generate[name].split(",") - - if ("fsync" in flags) or ("queue" in flags): - flags.append("need_fd") - for fname in flags: - print("#define JBR_CG_%s" % fname.upper()) - - if 'complete' in gen_funcs: - print(generate(templates[kind+"-complete"], - name,cbk_subs)) - - if 'continue' in gen_funcs: - print(generate(templates[kind+"-continue"], - name,fop_subs)) - - if 'fan-in' in gen_funcs: - print(generate(templates[kind+"-fan-in"], - name,cbk_subs)) - - if 'dispatch' in gen_funcs: - print(generate(templates[kind+"-dispatch"], - name,fop_subs)) - - if 'call_dispatch' in gen_funcs: - print(generate(templates[kind+"-call_dispatch"], - name,fop_subs)) - - if 'perform_local_op' in gen_funcs: - print(generate(templates[kind+"-perform_local_op"], - name, fop_subs)) - - if 'fop' in gen_funcs: - print(generate(templates[kind+"-fop"],name,fop_subs)) - - for fname in flags: - print("#undef JBR_CG_%s" % fname.upper()) - fops_done.append(name) - # Just for fun, emit the fops table too. - print("struct xlator_fops fops = {") - for x in fops_done: - print((" .%s = jbr_%s,"%(x,x))) - print("};") - -tmpl = load_templates(sys.argv[1]) -for l in open(sys.argv[2],'r').readlines(): - if l.find('#pragma generate') != -1: - print("/* BEGIN GENERATED CODE - DO NOT MODIFY */") - gen_server(tmpl) - print("/* END GENERATED CODE */") - else: - print(l[:-1]) diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h deleted file mode 100644 index 46a29910d1f..00000000000 --- a/xlators/experimental/jbr-server/src/jbr-internal.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <sys/stat.h> -#include <sys/types.h> - -#define LEADER_XATTR "user.jbr.leader" -#define SECOND_CHILD(xl) (xl->children->next->xlator) -#define RECONCILER_PATH JBR_SCRIPT_PREFIX"/reconciler.py" -#define CHANGELOG_ENTRY_SIZE 128 - -enum { - gf_mt_jbr_private_t = gf_common_mt_end + 1, - gf_mt_jbr_fd_ctx_t, - gf_mt_jbr_inode_ctx_t, - gf_mt_jbr_dirty_t, - gf_mt_jbr_end -}; - -typedef enum jbr_recon_notify_ev_id_t { - JBR_RECON_SET_LEADER = 1, - JBR_RECON_ADD_CHILD = 2 -} jbr_recon_notify_ev_id_t; - -typedef struct _jbr_recon_notify_ev_s { - jbr_recon_notify_ev_id_t id; - uint32_t index; /* in case of add */ - struct list_head list; -} jbr_recon_notify_ev_t; - -typedef struct { - /* - * This is a hack to allow a non-leader to accept requests while the - * leader is down, and it only works for n=2. The way it works is that - * "config_leader" indicates the state from our options (via init or - * reconfigure) but "leader" is what the fop code actually looks at. If - * config_leader is true, then leader will *always* be true as well, - * giving that brick precedence. If config_leader is false, then - * leader will only be true if there is no connection to the other - * brick (tracked in jbr_notify). - * - * TBD: implement real leader election - */ - gf_boolean_t config_leader; - gf_boolean_t leader; - uint8_t up_children; - uint8_t n_children; - char *vol_file; - uint32_t current_term; - uint32_t kid_state; - gf_lock_t dirty_lock; - struct list_head dirty_fds; - uint32_t index; - gf_lock_t index_lock; - double quorum_pct; - int term_fd; - long term_total; - long term_read; - /* - * This is a super-duper hack, but it will do for now. The reason it's - * a hack is that we pass this to dict_set_static_bin, so we don't have - * to mess around with allocating and freeing it on every single IPC - * request, but it's totally not thread-safe. On the other hand, there - * should only be one reconciliation thread running and calling these - * functions at a time, so maybe that doesn't matter. - * - * TBD: re-evaluate how to manage this - */ - char term_buf[CHANGELOG_ENTRY_SIZE]; - gf_boolean_t child_up; /* To maintain the state of * - * the translator */ -} jbr_private_t; - -typedef struct { - call_stub_t *stub; - call_stub_t *qstub; - uint32_t call_count; - uint32_t successful_acks; - uint32_t successful_op_ret; - fd_t *fd; - struct list_head qlinks; - dict_t *xdata; -} jbr_local_t; - -/* - * This should match whatever changelog returns on the pre-op for us to pass - * when we're ready for our post-op. - */ -typedef uint32_t log_id_t; - -typedef struct { - struct list_head links; - log_id_t id; -} jbr_dirty_list_t; - -typedef struct { - fd_t *fd; - struct list_head dirty_list; - struct list_head fd_list; -} jbr_fd_ctx_t; - -typedef struct { - gf_lock_t lock; - uint32_t active; - struct list_head aqueue; - uint32_t pending; - struct list_head pqueue; -} jbr_inode_ctx_t; - -void jbr_start_reconciler (xlator_t *this); diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c deleted file mode 100644 index 151ba57ab4c..00000000000 --- a/xlators/experimental/jbr-server/src/jbr.c +++ /dev/null @@ -1,1749 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fnmatch.h> -#include "call-stub.h" -#include "defaults.h" -#include "xlator.h" -#include "glfs.h" -#include "glfs-internal.h" -#include "run.h" -#include "common-utils.h" -#include "syncop.h" -#include "syscall.h" -#include "compat-errno.h" -#include "fdl.h" - -#include "jbr-internal.h" -#include "jbr-messages.h" - -#define JBR_FLUSH_INTERVAL 5 - -enum { - /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */ - JBR_SERVER_IPC_BASE = 0x0e2d66a5, - JBR_SERVER_TERM_RANGE, - JBR_SERVER_OPEN_TERM, - JBR_SERVER_NEXT_ENTRY -}; - -/* - * Need to declare jbr_lk_call_dispatch as jbr_lk_continue and * - * jbr_lk_perform_local_op call it, before code is generated. * - */ -int32_t -jbr_lk_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - fd_t *fd, int32_t cmd, struct gf_flock *lock, - dict_t *xdata); - -int32_t -jbr_lk_dispatch (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock, - dict_t *xdata); - -int32_t -jbr_ipc_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - int32_t op, dict_t *xdata); - -int32_t -jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xdata); - -/* Used to check the quorum of acks received after the fop - * confirming the status of the fop on all the brick processes - * for this particular subvolume - */ -gf_boolean_t -fop_quorum_check (xlator_t *this, double n_children, - double current_state) -{ - jbr_private_t *priv = NULL; - gf_boolean_t result = _gf_false; - double required = 0; - double current = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - required = n_children * priv->quorum_pct; - - /* - * Before performing the fop on the leader, we need to check, - * if there is any merit in performing the fop on the leader. - * In a case, where even a successful write on the leader, will - * not meet quorum, there is no point in trying the fop on the - * leader. - * When this function is called after the leader has tried - * performing the fop, this check will calculate quorum taking into - * account the status of the fop on the leader. If the leader's - * op_ret was -1, the complete function would account that by - * decrementing successful_acks by 1 - */ - - current = current_state * 100.0; - - if (current < required) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_QUORUM_NOT_MET, - "Quorum not met. quorum_pct = %f " - "Current State = %f, Required State = %f", - priv->quorum_pct, current, - required); - } else - result = _gf_true; - -out: - return result; -} - -jbr_inode_ctx_t * -jbr_get_inode_ctx (xlator_t *this, inode_t *inode) -{ - uint64_t ctx_int = 0LL; - jbr_inode_ctx_t *ctx_ptr; - - if (__inode_ctx_get(inode, this, &ctx_int) == 0) { - ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), - gf_mt_jbr_inode_ctx_t); - if (ctx_ptr) { - ctx_int = (uint64_t)(long)ctx_ptr; - if (__inode_ctx_set(inode, this, &ctx_int) == 0) { - LOCK_INIT(&ctx_ptr->lock); - INIT_LIST_HEAD(&ctx_ptr->aqueue); - INIT_LIST_HEAD(&ctx_ptr->pqueue); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; -} - -jbr_fd_ctx_t * -jbr_get_fd_ctx (xlator_t *this, fd_t *fd) -{ - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t); - if (ctx_ptr) { - if (__fd_ctx_set(fd, this, (uint64_t)ctx_ptr) == 0) { - INIT_LIST_HEAD(&ctx_ptr->dirty_list); - INIT_LIST_HEAD(&ctx_ptr->fd_list); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; -} - -void -jbr_mark_fd_dirty (xlator_t *this, jbr_local_t *local) -{ - fd_t *fd = local->fd; - jbr_fd_ctx_t *ctx_ptr; - jbr_dirty_list_t *dirty; - jbr_private_t *priv = this->private; - - /* - * TBD: don't do any of this for O_SYNC/O_DIRECT writes. - * Unfortunately, that optimization requires that we distinguish - * between writev and other "write" calls, saving the original flags - * and checking them in the callback. Too much work for too little - * gain right now. - */ - - LOCK(&fd->lock); - ctx_ptr = jbr_get_fd_ctx(this, fd); - dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t); - if (ctx_ptr && dirty) { - gf_msg_trace (this->name, 0, - "marking fd %p as dirty (%p)", fd, dirty); - /* TBD: fill dirty->id from what changelog gave us */ - list_add_tail(&dirty->links, &ctx_ptr->dirty_list); - if (list_empty(&ctx_ptr->fd_list)) { - /* Add a ref so _release doesn't get called. */ - ctx_ptr->fd = fd_ref(fd); - LOCK(&priv->dirty_lock); - list_add_tail (&ctx_ptr->fd_list, - &priv->dirty_fds); - UNLOCK(&priv->dirty_lock); - } - } else { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "could not mark %p dirty", fd); - if (ctx_ptr) { - GF_FREE(ctx_ptr); - } - if (dirty) { - GF_FREE(dirty); - } - } - UNLOCK(&fd->lock); -} - -#define JBR_TERM_XATTR "trusted.jbr.term" -#define JBR_INDEX_XATTR "trusted.jbr.index" -#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count" -#define RECON_TERM_XATTR "trusted.jbr.recon-term" -#define RECON_INDEX_XATTR "trusted.jbr.recon-index" - -int32_t -jbr_leader_checks_and_init (call_frame_t *frame, xlator_t *this, int *op_errno, - dict_t *xdata, fd_t *fd) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - gf_boolean_t result = _gf_false; - int from_leader = _gf_false; - int from_recon = _gf_false; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - - /* - * Our first goal here is to avoid "split brain surprise" for users who - * specify exactly 50% with two- or three-way replication. That means - * either a more-than check against half the total replicas or an - * at-least check against half of our peers (one less). Of the two, - * only an at-least check supports the intuitive use of 100% to mean - * all replicas must be present, because "more than 100%" will never - * succeed regardless of which count we use. This leaves us with a - * slightly non-traditional definition of quorum ("at least X% of peers - * not including ourselves") but one that's useful enough to be worth - * it. - * - * Note that n_children and up_children *do* include the local - * subvolume, so we need to subtract one in each case. - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - - if (result == _gf_false) { - /* Emulate the AFR client-side-quorum behavior. */ - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Sufficient number of " - "subvolumes are not up to meet quorum."); - *op_errno = EROFS; - goto out; - } - } else { - if (xdata) { - from_leader = !!dict_get(xdata, JBR_TERM_XATTR); - from_recon = !!dict_get(xdata, RECON_TERM_XATTR) - && !!dict_get(xdata, RECON_INDEX_XATTR); - } else { - from_leader = from_recon = _gf_false; - } - - /* follower/recon path * - * just send it to local node * - */ - if (!from_leader && !from_recon) { - *op_errno = EREMOTE; - goto out; - } - } - - local = mem_get0(this->local_pool); - if (!local) { - goto out; - } - - if (fd) - local->fd = fd_ref(fd); - else - local->fd = NULL; - - INIT_LIST_HEAD(&local->qlinks); - local->successful_acks = 0; - frame->local = local; - - ret = 0; -out: - return ret; -} - -int32_t -jbr_initialize_xdata_set_attrs (xlator_t *this, dict_t **xdata) -{ - jbr_private_t *priv = NULL; - int32_t ret = -1; - uint32_t ti = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, xdata, out); - - if (!*xdata) { - *xdata = dict_new(); - if (!*xdata) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "failed to allocate xdata"); - goto out; - } - } - - if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set jbr-term"); - goto out; - } - - LOCK(&priv->index_lock); - ti = ++(priv->index); - UNLOCK(&priv->index_lock); - if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set index"); - goto out; - } - - ret = 0; -out: - return ret; -} - -int32_t -jbr_remove_from_queue (call_frame_t *frame, xlator_t *this) -{ - int32_t ret = -1; - jbr_inode_ctx_t *ictx = NULL; - jbr_local_t *local = NULL; - jbr_local_t *next = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - if (local->qlinks.next != &local->qlinks) { - list_del(&local->qlinks); - ictx = jbr_get_inode_ctx(this, local->fd->inode); - if (ictx) { - LOCK(&ictx->lock); - if (ictx->pending) { - /* - * TBD: dequeue *all* non-conflicting - * reqs - * - * With the stub implementation there - * can only be one request active at a - * time (zero here) so it's not an - * issue. In a real implementation - * there might still be other active - * requests to check against, and - * multiple pending requests that could - * continue. - */ - gf_msg_debug (this->name, 0, - "unblocking next request"); - --(ictx->pending); - next = list_entry (ictx->pqueue.next, - jbr_local_t, qlinks); - list_del(&next->qlinks); - list_add_tail(&next->qlinks, - &ictx->aqueue); - call_resume(next->qstub); - } else { - --(ictx->active); - } - UNLOCK(&ictx->lock); - } - } - - ret = 0; - -out: - return ret; -} - -int32_t -jbr_lk_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct gf_flock *flock, dict_t *xdata) -{ - int32_t ret = -1; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - gf_boolean_t result = _gf_false; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, err); - GF_VALIDATE_OR_GOTO (this->name, flock, err); - GF_VALIDATE_OR_GOTO (this->name, xdata, err); - - /* - * Remove from queue for unlock operation only * - * For lock operation, it will be done in fan-in * - */ - if (flock->l_type == F_UNLCK) { - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto err; - } - - /* - * On a follower, unwind with the op_ret and op_errno. On a * - * leader, if the fop is a locking fop, and its a failure, * - * send fail, else call stub which will dispatch the fop to * - * the followers. * - * * - * If the fop is a unlocking fop, check quorum. If quorum * - * is met, then send success. Else Rollback on leader, * - * followed by followers, and then send -ve ack to client. * - */ - if (priv->leader) { - - /* Increase the successful acks if it's a success. */ - LOCK(&frame->lock); - if (op_ret != -1) - (local->successful_acks)++; - UNLOCK(&frame->lock); - - if (flock->l_type == F_UNLCK) { - result = fop_quorum_check (this, - (double)priv->n_children, - (double)local->successful_acks); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, - "Quorum is not met. " - "The operation has failed."); - - /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER * - * FOLLOWED BY FOLLOWERS. */ - } else { - op_ret = 0; - op_errno = 0; - } - - fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, - flock, xdata); - } else { - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_LOCK_FAILURE, - "The lock operation failed on " - "the leader."); - - fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, - op_errno, flock, xdata); - } else { - if (!local->stub) { - goto err; - } - - call_resume(local->stub); - } - } - } else { - fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, - flock, xdata); - } - - return 0; - -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); - } - STACK_UNWIND_STRICT (lk, frame, -1, op_errno, - flock, xdata); - return 0; -} - -int32_t -jbr_lk_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *flock, - dict_t *xdata) -{ - uint8_t call_count = -1; - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - if (call_count == 0) { - /* - * If the fop is a locking fop, then check quorum. If quorum * - * is met, send successful ack to the client. If quorum is * - * not met, then rollback locking on followers, followed by * - * rollback of locking on leader, and then sending -ve ack * - * to the client. * - * * - * If the fop is a unlocking fop, then call stub. * - */ - if (flock->l_type == F_UNLCK) { - call_resume(local->stub); - } else { - /* - * Remove from queue for locking fops, for unlocking * - * fops, it is taken care of in jbr_lk_complete * - */ - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto out; - - fd_unref(local->fd); - - result = fop_quorum_check (this, - (double)priv->n_children, - (double)local->successful_acks); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, - "Didn't receive enough acks to meet " - "quorum. Failing the locking " - "operation and initiating rollback on " - "followers and the leader " - "respectively."); - - /* TODO: PERFORM ROLLBACK OF LOCKING ON - * FOLLOWERS, FOLLOWED BY ROLLBACK ON - * LEADER. - */ - - STACK_UNWIND_STRICT (lk, frame, -1, EROFS, - flock, xdata); - } else { - STACK_UNWIND_STRICT (lk, frame, 0, 0, - flock, xdata); - } - } - } - - ret = 0; -out: - return ret; -} - -/* - * Called from leader for locking fop, being writen as a separate * - * function so as to support queues. * - */ -int32_t -jbr_perform_lk_on_leader (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - STACK_WIND (frame, jbr_lk_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, - fd, cmd, flock, xdata); - - ret = 0; -out: - return ret; -} - -int32_t -jbr_lk_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, - fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) -{ - int32_t ret = -1; - jbr_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - - /* - * Check if the fop is a locking fop or unlocking fop, and - * handle it accordingly. If it is a locking fop, take the - * lock on leader first, and then send it to the followers. - * If it is a unlocking fop, unlock the followers first, - * and then on meeting quorum perform the unlock on the leader. - */ - if (flock->l_type == F_UNLCK) { - ret = jbr_lk_call_dispatch (frame, this, op_errno, - fd, cmd, flock, xdata); - if (ret) - goto out; - } else { - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - - if (!ictx) { - *op_errno = EIO; - goto out; - } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - - local->qstub = fop_lk_stub (frame, - jbr_perform_lk_on_leader, - fd, cmd, flock, xdata); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } - UNLOCK(&ictx->lock); - ret = jbr_perform_lk_on_leader (frame, this, fd, cmd, - flock, xdata); - } - - ret = 0; -out: - return ret; -} - -int32_t -jbr_lk_continue (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) -{ - int32_t ret = -1; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, xdata, out); - - /* - * If it's a locking fop, then call dispatch to followers * - * If it's a unlock fop, then perform the unlock operation * - */ - if (flock->l_type == F_UNLCK) { - STACK_WIND (frame, jbr_lk_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, - fd, cmd, flock, xdata); - } else { - /* - * Directly call jbr_lk_dispatch instead of appending * - * in queue, which is done at jbr_lk_perform_local_op * - * for locking fops * - */ - ret = jbr_lk_dispatch (frame, this, fd, cmd, - flock, xdata); - if (ret) { - STACK_UNWIND_STRICT (lk, frame, -1, 0, - flock, xdata); - goto out; - } - } - - ret = 0; -out: - return ret; -} - -uint8_t -jbr_count_up_kids (jbr_private_t *priv) -{ - uint8_t retval = 0; - uint8_t i; - - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } - } - - return retval; -} - -/* - * The fsync machinery looks a lot like that for any write call, but there are - * some important differences that are easy to miss. First, we don't care - * about the xdata that shows whether the call came from a leader or - * reconciliation process. If we're the leader we fan out; if we're not we - * don't. Second, we don't wait for followers before we issue the local call. - * The code generation system could be updated to handle this, and still might - * if we need to implement other "almost identical" paths (e.g. for open), but - * a copy is more readable as long as it's just one. - */ - -int32_t -jbr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - jbr_local_t *local = frame->local; - gf_boolean_t unwind; - - LOCK(&frame->lock); - unwind = !--(local->call_count); - UNLOCK(&frame->lock); - - if (unwind) { - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - } - return 0; -} - -int32_t -jbr_fsync_local_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; - jbr_local_t *local = frame->local; - - list_for_each_entry_safe (dirty, dtmp, &local->qlinks, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", local->fd, dirty); - GF_FREE(dirty); - } - - return jbr_fsync_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata); -} - -int32_t -jbr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) -{ - jbr_private_t *priv = this->private; - jbr_local_t *local; - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - xlator_list_t *trav; - - local = mem_get0(this->local_pool); - if (!local) { - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, - NULL, NULL, xdata); - return 0; - } - INIT_LIST_HEAD(&local->qlinks); - frame->local = local; - - /* Move the dirty list from the fd to the fsync request. */ - LOCK(&fd->lock); - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - list_splice_init (&ctx_ptr->dirty_list, - &local->qlinks); - } - UNLOCK(&fd->lock); - - /* Issue the local call. */ - local->call_count = priv->leader ? priv->n_children : 1; - STACK_WIND (frame, jbr_fsync_local_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - - /* Issue remote calls if we're the leader. */ - if (priv->leader) { - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_fsync_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - } - } - - return 0; -} - -int32_t -jbr_getxattr_special (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - dict_t *result; - jbr_private_t *priv = this->private; - - if (!priv->leader) { - STACK_UNWIND_STRICT (getxattr, frame, -1, EREMOTE, NULL, NULL); - return 0; - } - - if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) { - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); - return 0; - } - - result = dict_new(); - if (!result) { - goto dn_failed; - } - - priv->up_children = jbr_count_up_kids(this->private); - if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, - priv->up_children) != 0) { - goto dsu_failed; - } - - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, result, NULL); - dict_unref(result); - return 0; - -dsu_failed: - dict_unref(result); -dn_failed: - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; -} - -void -jbr_flush_fd (xlator_t *this, jbr_fd_ctx_t *fd_ctx) -{ - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; - - list_for_each_entry_safe (dirty, dtmp, &fd_ctx->dirty_list, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", fd_ctx->fd, dirty); - GF_FREE(dirty); - } - - INIT_LIST_HEAD(&fd_ctx->dirty_list); -} - -void * -jbr_flush_thread (void *ctx) -{ - xlator_t *this = ctx; - jbr_private_t *priv = this->private; - struct list_head dirty_fds; - jbr_fd_ctx_t *fd_ctx; - jbr_fd_ctx_t *fd_tmp; - int ret; - - for (;;) { - /* - * We have to be very careful to avoid lock inversions here, so - * we can't just hold priv->dirty_lock while we take and - * release locks for each fd. Instead, we only hold dirty_lock - * at the beginning of each iteration, as we (effectively) make - * a copy of the current list head and then clear the original. - * This leads to four scenarios for adding the first entry to - * an fd and potentially putting it on the global list. - * - * (1) While we're asleep. No lock contention, it just gets - * added and will be processed on the next iteration. - * - * (2) After we've made a local copy, but before we've started - * processing that fd. The new entry will be added to the - * fd (under its lock), and we'll process it on the current - * iteration. - * - * (3) While we're processing the fd. They'll block on the fd - * lock, then see that the list is empty and put it on the - * global list. We'll process it here on the next - * iteration. - * - * (4) While we're working, but after we've processed that fd. - * Same as (1) as far as that fd is concerned. - */ - INIT_LIST_HEAD(&dirty_fds); - LOCK(&priv->dirty_lock); - list_splice_init(&priv->dirty_fds, &dirty_fds); - UNLOCK(&priv->dirty_lock); - - list_for_each_entry_safe (fd_ctx, fd_tmp, &dirty_fds, fd_list) { - ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to fsync %p (%d)", - fd_ctx->fd, -ret); - } - - LOCK(&fd_ctx->fd->lock); - jbr_flush_fd(this, fd_ctx); - list_del_init(&fd_ctx->fd_list); - UNLOCK(&fd_ctx->fd->lock); - fd_unref(fd_ctx->fd); - } - - sleep(JBR_FLUSH_INTERVAL); - } - - return NULL; -} - - -int32_t -jbr_get_changelog_dir (xlator_t *this, char **cl_dir_p) -{ - xlator_t *cl_xl; - - /* Find our changelog translator. */ - cl_xl = this; - while (cl_xl) { - if (strcmp(cl_xl->type, "features/changelog") == 0) { - break; - } - cl_xl = cl_xl->children->xlator; - } - if (!cl_xl) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog translator"); - return ENOENT; - } - - /* Find the actual changelog directory. */ - if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog-dir for %s", cl_xl->name); - return ENODATA; - } - - return 0; -} - - -void -jbr_get_terms (call_frame_t *frame, xlator_t *this) -{ - int32_t op_errno = 0; - char *cl_dir = NULL; - int32_t term_first = -1; - int32_t term_contig = -1; - int32_t term_last = -1; - int term_num = 0; - char *probe_str = NULL; - dict_t *my_xdata = NULL; - DIR *fp = NULL; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{0,},}; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; /* Error was already logged. */ - } - op_errno = ENODATA; /* Most common error after this. */ - - fp = sys_opendir (cl_dir); - if (!fp) { - op_errno = errno; - goto err; - } - - /* Find first and last terms. */ - for (;;) { - errno = 0; - entry = sys_readdir (fp, scratch); - if (!entry || errno != 0) { - if (errno != 0) { - op_errno = errno; - goto err; - } - break; - } - - if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) { - continue; - } - /* +5 points to the character after the period */ - term_num = atoi(entry->d_name+5); - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "%s => %d", entry->d_name, term_num); - if (term_num < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INVALID, - "invalid term file name %s", entry->d_name); - op_errno = EINVAL; - goto err; - } - if ((term_first < 0) || (term_first > term_num)) { - term_first = term_num; - } - if ((term_last < 0) || (term_last < term_num)) { - term_last = term_num; - } - } - if ((term_first < 0) || (term_last < 0)) { - /* TBD: are we *sure* there should always be at least one? */ - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "no terms found"); - op_errno = EINVAL; - goto err; - } - - (void) sys_closedir (fp); - fp = NULL; - - /* - * Find term_contig, which is the earliest term for which there are - * no gaps between it and term_last. - */ - for (term_contig = term_last; term_contig > 0; --term_contig) { - if (gf_asprintf(&probe_str, "%s/TERM.%d", - cl_dir, term_contig-1) <= 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to format term %d", term_contig-1); - goto err; - } - if (sys_access(probe_str, F_OK) != 0) { - GF_FREE(probe_str); - break; - } - GF_FREE(probe_str); - } - - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "found terms %d-%d (%d)", - term_first, term_last, term_contig); - - /* Return what we've found */ - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to allocate reply dictionary"); - goto err; - } - if (dict_set_int32(my_xdata, "term-first", term_first) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-first"); - goto err; - } - if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-contig"); - goto err; - } - if (dict_set_int32(my_xdata, "term-last", term_last) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-last"); - goto err; - } - - /* Finally! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; - -err: - if (fp) { - (void) sys_closedir (fp); - } - if (my_xdata) { - dict_unref(my_xdata); - } - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); -} - - -long -get_entry_count (xlator_t *this, int fd) -{ - struct stat buf; - long min; /* last entry not known to be empty */ - long max; /* first entry known to be empty */ - long curr; - char entry[CHANGELOG_ENTRY_SIZE]; - - if (sys_fstat (fd, &buf) < 0) { - return -1; - } - - min = 0; - max = buf.st_size / CHANGELOG_ENTRY_SIZE; - - while ((min+1) < max) { - curr = (min + max) / 2; - if (sys_lseek(fd, curr*CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) { - return -1; - } - if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) { - return -1; - } - if ((entry[0] == '_') && (entry[1] == 'P')) { - min = curr; - } else { - max = curr; - } - } - - if (sys_lseek(fd, 0, SEEK_SET) < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to reset offset"); - } - return max; -} - - -void -jbr_open_term (call_frame_t *frame, xlator_t *this, dict_t *xdata) -{ - int32_t op_errno; - char *cl_dir; - char *term; - char *path = NULL; - jbr_private_t *priv = this->private; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; - } - - if (dict_get_str(xdata, "term", &term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "missing term"); - op_errno = ENODATA; - goto err; - } - - if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to construct path"); - op_errno = ENOMEM; - goto err; - } - - if (priv->term_fd >= 0) { - sys_close (priv->term_fd); - } - priv->term_fd = open(path, O_RDONLY); - if (priv->term_fd < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to open term file"); - goto err; - } - - priv->term_total = get_entry_count(this, priv->term_fd); - if (priv->term_total < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "failed to get entry count"); - sys_close (priv->term_fd); - priv->term_fd = -1; - op_errno = EIO; - goto err; - } - priv->term_read = 0; - - /* Success! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - GF_FREE (path); - return; - -err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); - GF_FREE (path); -} - - -void -jbr_next_entry (call_frame_t *frame, xlator_t *this) -{ - int32_t op_errno = ENOMEM; - jbr_private_t *priv = this->private; - ssize_t nbytes; - dict_t *my_xdata; - - if (priv->term_fd < 0) { - op_errno = EBADFD; - goto err; - } - - if (priv->term_read >= priv->term_total) { - op_errno = ENODATA; - goto err; - } - - nbytes = sys_read (priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE); - if (nbytes < CHANGELOG_ENTRY_SIZE) { - if (nbytes < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "error reading next entry: %s", - strerror(errno)); - } else { - op_errno = EIO; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "got %zd/%d bytes for next entry", - nbytes, CHANGELOG_ENTRY_SIZE); - } - goto err; - } - ++(priv->term_read); - - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to allocate reply xdata"); - goto err; - } - - if (dict_set_static_bin(my_xdata, "data", - priv->term_buf, CHANGELOG_ENTRY_SIZE) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to assign reply xdata"); - goto err; - } - - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; - -err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); -} - -int32_t -jbr_ipc_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - UNLOCK(&frame->lock); - - if (call_count == 0) { -#if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue (frame, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_GENERIC, "Failed to remove from queue."); - } -#endif - /* - * Unrefing the reference taken in continue() or complete() * - */ - dict_unref (local->xdata); - STACK_DESTROY (frame->root); - } - - ret = 0; -out: - return ret; -} - -int32_t -jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xdata) -{ - jbr_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - jbr_ipc_call_dispatch (frame, - this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - local->xdata); -out: - return 0; -} - -int32_t -jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - switch (op) { - case JBR_SERVER_TERM_RANGE: - jbr_get_terms(frame, this); - break; - case JBR_SERVER_OPEN_TERM: - jbr_open_term(frame, this, xdata); - break; - case JBR_SERVER_NEXT_ENTRY: - jbr_next_entry(frame, this); - break; - case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * Just send the fop down to fdl. Need not * - * dispatch it to other bricks in the sub- * - * volume, as it will be done where the op * - * has failed. * - */ - default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } - - return 0; -} - -#pragma generate - -int32_t -jbr_forget (xlator_t *this, inode_t *inode) -{ - uint64_t ctx = 0LL; - - if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } - - return 0; -} - -int32_t -jbr_release (xlator_t *this, fd_t *fd) -{ - uint64_t ctx = 0LL; - - if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } - - return 0; -} - -struct xlator_cbks cbks = { - .forget = jbr_forget, - .release = jbr_release, -}; - -int -jbr_reconfigure (xlator_t *this, dict_t *options) -{ - jbr_private_t *priv = this->private; - - GF_OPTION_RECONF ("leader", - priv->config_leader, options, bool, err); - GF_OPTION_RECONF ("quorum-percent", - priv->quorum_pct, options, percent, err); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "reconfigure called, config_leader = %d, quorum_pct = %.1f\n", - priv->leader, priv->quorum_pct); - - priv->leader = priv->config_leader; - - return 0; - -err: - return -1; -} - -int -jbr_get_child_index (xlator_t *this, xlator_t *kid) -{ - xlator_list_t *trav; - int retval = -1; - - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } - } - - return -1; -} - -/* - * Child notify handling is unreasonably FUBAR. Sometimes we'll get a - * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it. - * Other times we won't. Because it's effectively random (probably racy), we - * can't just maintain a count. We actually have to keep track of the state - * for each child separately, to filter out the bogus CHILD_DOWN events, and - * then generate counts on demand. - */ -int -jbr_notify (xlator_t *this, int event, void *data, ...) -{ - jbr_private_t *priv = this->private; - int index = -1; - int ret = -1; - gf_boolean_t result = _gf_false; - gf_boolean_t relevant = _gf_false; - - switch (event) { - case GF_EVENT_CHILD_UP: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously down - * and it's not a false CHILD_UP - */ - if (!(priv->kid_state & (1 << index))) { - relevant = _gf_true; - } - - priv->kid_state |= (1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children > 1)) { - priv->leader = _gf_false; - } - - /* If it's not relevant, or we have already * - * sent CHILD_UP just break */ - if (!relevant || priv->child_up) - break; - - /* If it's not a leader, just send the notify up */ - if (!priv->leader) { - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are up to meet quorum. Waiting to " - "send CHILD_UP from leader"); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are up " - "to meet quorum. Sending CHILD_UP " - "from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - } - } - break; - case GF_EVENT_CHILD_DOWN: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously up - * and it's not a false CHILD_DOWN - */ - if (priv->kid_state & (1 << index)) { - relevant = _gf_true; - } - priv->kid_state &= ~(1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children < 2) - && relevant) { - priv->leader = _gf_true; - } - - /* If it's not relevant, or we have already * - * sent CHILD_DOWN just break */ - if (!relevant || !priv->child_up) - break; - - /* If it's not a leader, just break coz we shouldn't * - * propagate the failure from the failure till it * - * itself goes down * - */ - if (!priv->leader) { - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are " - "to down to fail quorum. " - "Sending CHILD_DOWN from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_false; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are down to fail quorum. Waiting to " - "send CHILD_DOWN from leader"); - } - } - break; - default: - ret = default_notify(this, event, data); - } - - return ret; -} - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - - ret = xlator_mem_acct_init (this, gf_mt_jbr_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "Memory accounting init" "failed"); - return ret; - } -out: - return ret; -} - - -void -jbr_deallocate_priv (jbr_private_t *priv) -{ - if (!priv) { - return; - } - - GF_FREE(priv); -} - - -int32_t -jbr_init (xlator_t *this) -{ - xlator_list_t *remote; - xlator_list_t *local; - jbr_private_t *priv = NULL; - xlator_list_t *trav; - pthread_t kid; - extern xlator_t global_xlator; - glusterfs_ctx_t *oldctx = global_xlator.ctx; - - /* - * Any fop that gets special treatment has to be patched in here, - * because the compiled-in table is produced by the code generator and - * only contains generated functions. Note that we have to go through - * this->fops because of some dynamic-linking strangeness; modifying - * the static table doesn't work. - */ - this->fops->getxattr = jbr_getxattr_special; - this->fops->fsync = jbr_fsync; - - local = this->children; - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no local subvolume"); - goto err; - } - - remote = local->next; - if (!remote) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no remote subvolumes"); - goto err; - } - - this->local_pool = mem_pool_new (jbr_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "failed to create jbr_local_t pool"); - goto err; - } - - priv = GF_CALLOC (1, sizeof(*priv), gf_mt_jbr_private_t); - if (!priv) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "could not allocate priv"); - goto err; - } - - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } - - LOCK_INIT(&priv->dirty_lock); - LOCK_INIT(&priv->index_lock); - INIT_LIST_HEAD(&priv->dirty_fds); - priv->term_fd = -1; - - this->private = priv; - - GF_OPTION_INIT ("leader", priv->config_leader, bool, err); - GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); - - priv->leader = priv->config_leader; - priv->child_up = _gf_false; - - if (gf_thread_create (&kid, NULL, jbr_flush_thread, this, - "jbrflush") != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, - "could not start flush thread"); - /* TBD: treat this as a fatal error? */ - } - - /* - * Calling glfs_new changes old->ctx, even if THIS still points - * to global_xlator. That causes problems later in the main - * thread, when gf_log_dump_graph tries to use the FILE after - * we've mucked with it and gets a segfault in __fprintf_chk. - * We can avoid all that by undoing the damage before we - * continue. - */ - global_xlator.ctx = oldctx; - - return 0; - -err: - jbr_deallocate_priv(priv); - return -1; -} - - -void -jbr_fini (xlator_t *this) -{ - jbr_deallocate_priv(this->private); -} - -class_methods_t class_methods = { - .init = jbr_init, - .fini = jbr_fini, - .reconfigure = jbr_reconfigure, - .notify = jbr_notify, -}; - -struct volume_options options[] = { - { .key = {"leader"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "false", - .description = "Start in the leader role. This is only for " - "bootstrapping the code, and should go away when we " - "have real leader election." - }, - { .key = {"vol-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "volume name" - }, - { .key = {"my-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "brick name in form of host:/path" - }, - { .key = {"etcd-servers"}, - .type = GF_OPTION_TYPE_STR, - .description = "list of comma separated etc servers" - }, - { .key = {"subvol-uuid"}, - .type = GF_OPTION_TYPE_STR, - .description = "UUID for this JBR (sub)volume" - }, - { .key = {"quorum-percent"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "50.0", - .description = "percentage of rep_count-1 that must be up" - }, - { .key = {NULL} }, -}; diff --git a/xlators/experimental/posix2/Makefile.am b/xlators/experimental/posix2/Makefile.am deleted file mode 100644 index 74e5ab0f5bc..00000000000 --- a/xlators/experimental/posix2/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = common mds ds - -CLEANFILES = diff --git a/xlators/experimental/posix2/README.md b/xlators/experimental/posix2/README.md deleted file mode 100644 index 955a98d061e..00000000000 --- a/xlators/experimental/posix2/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# POSIX2 Experimental README - -POSIX2 is an implementation of modified storage translator to cater to DHT2 -on disk needs. - -For further understanding, refer to xlators/experimental/dht2/README.md for -details regarding POSIX2 diff --git a/xlators/experimental/posix2/TODO.md b/xlators/experimental/posix2/TODO.md deleted file mode 100644 index 20cd1e89c1d..00000000000 --- a/xlators/experimental/posix2/TODO.md +++ /dev/null @@ -1,3 +0,0 @@ -# POSIX2 TODO List - -<Items will be added as code is pulled into the repository>
\ No newline at end of file diff --git a/xlators/experimental/posix2/common/Makefile.am b/xlators/experimental/posix2/common/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/posix2/common/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/posix2/common/src/Makefile.am b/xlators/experimental/posix2/common/src/Makefile.am deleted file mode 100644 index 07533d2bf37..00000000000 --- a/xlators/experimental/posix2/common/src/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -lib_LTLIBRARIES = libposix2common.la - -posix2_common_sources = posix2-common.c - -libposix2common_la_SOURCES = $(posix2_common_sources) -libposix2common_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -libposix2common_la_CFLAGS = -Wall $(GF_CFLAGS) - -libposix2common_la_CPPFLAGS = $(GF_CPPFLAGS) -libposix2common_la_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src -libposix2common_la_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src -libposix2common_la_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src - - -CLEANFILES = diff --git a/xlators/experimental/posix2/common/src/posix2-common.c b/xlators/experimental/posix2/common/src/posix2-common.c deleted file mode 100644 index 14b51d538b2..00000000000 --- a/xlators/experimental/posix2/common/src/posix2-common.c +++ /dev/null @@ -1,18 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: posix2-common.c - * This file contains common routines across ds and mds posix xlators - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "logging.h" -#include "statedump.h" diff --git a/xlators/experimental/posix2/ds/Makefile.am b/xlators/experimental/posix2/ds/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/posix2/ds/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/posix2/ds/src/Makefile.am b/xlators/experimental/posix2/ds/src/Makefile.am deleted file mode 100644 index d77ef8cb540..00000000000 --- a/xlators/experimental/posix2/ds/src/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = posix2-ds.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -posix2_ds_sources = posix2-ds-main.c - -posix2_ds_la_SOURCES = $(posix2_ds_sources) -posix2_ds_la_LDFLAGS = -module -avoid-version -posix2_ds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -posix2_ds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -AM_CPPFLAGS = $(GF_CPPFLAGS) -AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src -AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src -AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src -AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src - -CLEANFILES = diff --git a/xlators/experimental/posix2/ds/src/posix2-ds-main.c b/xlators/experimental/posix2/ds/src/posix2-ds-main.c deleted file mode 100644 index 675c4d7c9da..00000000000 --- a/xlators/experimental/posix2/ds/src/posix2-ds-main.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: posix2-ds-main.c - * This file contains the xlator loading functions, FOP entry points - * and options. - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "statedump.h" - -int32_t -posix2_ds_init (xlator_t *this) -{ - if (this->children) { - gf_log (this->name, GF_LOG_ERROR, - "This (%s) is a leaf xlator, but found children", - this->name); - return -1; - } - - return 0; -} - -void -posix2_ds_fini (xlator_t *this) -{ - return; -} - -class_methods_t class_methods = { - .init = posix2_ds_init, - .fini = posix2_ds_fini, -}; - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -/* -struct xlator_dumpops dumpops = { -}; -*/ - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/experimental/posix2/mds/Makefile.am b/xlators/experimental/posix2/mds/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/posix2/mds/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/posix2/mds/src/Makefile.am b/xlators/experimental/posix2/mds/src/Makefile.am deleted file mode 100644 index c6411f46114..00000000000 --- a/xlators/experimental/posix2/mds/src/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -if WITH_SERVER -xlator_LTLIBRARIES = posix2-mds.la -endif -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -posix2_mds_sources = posix2-mds-main.c - -posix2_mds_la_SOURCES = $(posix2_mds_sources) -posix2_mds_la_LDFLAGS = -module -avoid-version -posix2_mds_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -posix2_mds_la_LIBADD += $(top_builddir)/xlators/experimental/posix2/common/src/libposix2common.la - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -AM_CPPFLAGS = $(GF_CPPFLAGS) -AM_CPPFLAGS += -I$(top_srcdir)/xlators/storage/posix2/common/src -AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src -AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src -AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src -AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src - -CLEANFILES = diff --git a/xlators/experimental/posix2/mds/src/posix2-mds-main.c b/xlators/experimental/posix2/mds/src/posix2-mds-main.c deleted file mode 100644 index 71ff4e0089c..00000000000 --- a/xlators/experimental/posix2/mds/src/posix2-mds-main.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -/* File: posix2-mds-main.c - * This file contains the xlator loading functions, FOP entry points - * and options. - * The entire functionality including comments is TODO. - */ - -#include "glusterfs.h" -#include "xlator.h" -#include "logging.h" -#include "statedump.h" - -int32_t -posix2_mds_init (xlator_t *this) -{ - if (this->children) { - gf_log (this->name, GF_LOG_ERROR, - "This (%s) is a leaf xlator, but found children", - this->name); - return -1; - } - - return 0; -} - -void -posix2_mds_fini (xlator_t *this) -{ - return; -} - -class_methods_t class_methods = { - .init = posix2_mds_init, - .fini = posix2_mds_fini, -}; - -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; - -/* -struct xlator_dumpops dumpops = { -}; -*/ - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 0f4df92f07d..86b1a39aaa1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3525,28 +3525,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_7_6, .flags = VOLOPT_FLAG_CLIENT_OPT }, - { .key = "cluster.jbr", - .voltype = "experimental/jbr", - .option = "!jbr", - .op_version = GD_OP_VERSION_4_0_0, - .description = "enable JBR instead of AFR for replication", - .flags = VOLOPT_FLAG_CLIENT_OPT | VOLOPT_FLAG_XLATOR_OPT - }, - { .key = "cluster.jbr.quorum-percent", - .voltype = "experimental/jbr", - .option = "quorum-percent", - .op_version = GD_OP_VERSION_4_0_0, - .description = "percent of rep_count-1 bricks that must be up" - }, - /* Full Data Logging */ - { - .key = "features.fdl", - .voltype = "features/fdl", - .option = "!fdl", - .op_version = GD_OP_VERSION_4_0_0, - .flags = VOLOPT_FLAG_XLATOR_OPT, - .type = NO_DOC, - }, { .key = "cluster.shd-max-threads", .voltype = "cluster/replicate", .op_version = GD_OP_VERSION_3_7_12, |