diff options
39 files changed, 2 insertions, 4830 deletions
diff --git a/.gitignore b/.gitignore index e5c699beaf8..7a3a2ba3a5e 100644 --- a/.gitignore +++ b/.gitignore @@ -95,11 +95,3 @@ extras/peer_add_secret_pub tools/gfind_missing_files/gcrawler tools/glusterfind/glusterfind tools/glusterfind/src/tool.conf -# Generated by fdl xlator -xlators/experimental/fdl/src/fdl.c -xlators/experimental/fdl/src/gf_logdump -xlators/experimental/fdl/src/gf_recon -xlators/experimental/fdl/src/libfdl.c -xlators/experimental/fdl/src/librecon.c -xlators/experimental/jbr-client/src/jbrc-cg.c -xlators/experimental/jbr-server/src/jbr-cg.c diff --git a/configure.ac b/configure.ac index 5e3b68b6247..5caff8a0f4c 100644 --- a/configure.ac +++ b/configure.ac @@ -119,8 +119,6 @@ AC_CONFIG_FILES([Makefile xlators/features/Makefile xlators/features/arbiter/Makefile xlators/features/arbiter/src/Makefile - xlators/experimental/fdl/Makefile - xlators/experimental/fdl/src/Makefile xlators/features/changelog/Makefile xlators/features/changelog/src/Makefile xlators/features/changelog/lib/Makefile @@ -191,10 +189,6 @@ AC_CONFIG_FILES([Makefile xlators/mgmt/glusterd/Makefile xlators/mgmt/glusterd/src/Makefile xlators/experimental/Makefile - xlators/experimental/jbr-client/Makefile - xlators/experimental/jbr-client/src/Makefile - xlators/experimental/jbr-server/Makefile - xlators/experimental/jbr-server/src/Makefile cli/Makefile cli/src/Makefile doc/Makefile diff --git a/glusterfs.spec.in b/glusterfs.spec.in index fb65abcc3e8..39b6cb01c71 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -910,7 +910,6 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so -%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/fdl.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so @@ -1088,8 +1087,6 @@ exit 0 %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so -%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/jbrc.so -%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/jbr.so %if ( 0%{!?_without_tiering:1} ) %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changetimerecorder.so %endif @@ -1186,9 +1183,6 @@ exit 0 /usr/lib/firewalld/services/glusterfs.xml %endif -%{_sbindir}/gf_logdump -%{_sbindir}/gf_recon - %changelog * Fri May 6 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com> - additional dirs and files in /var/lib/glusterd/... (#1333900) diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 3324d0b2aeb..8de913be199 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -1226,11 +1226,6 @@ should_call_fini (glusterfs_ctx_t *ctx, xlator_t *trav) return _gf_true; } - /* This is the only one known to be safe in glusterfsd. */ - if (!strcmp(trav->type,"experimental/fdl")) { - return _gf_true; - } - return _gf_false; } diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t index a6ae287b7a7..6722c433a15 100644 --- a/tests/basic/ec/ec.t +++ b/tests/basic/ec/ec.t @@ -11,12 +11,6 @@ function my_getfattr { getfattr --only-values -e text $* 2> /dev/null } -function get_rep_count { - v=$(my_getfattr -n trusted.jbr.rep-count $1) - #echo $v > /dev/tty - echo $v -} - function create_file { dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null } diff --git a/tests/basic/jbr/jbr-volgen.t b/tests/basic/jbr/jbr-volgen.t deleted file mode 100644 index fcd20e5f998..00000000000 --- a/tests/basic/jbr/jbr-volgen.t +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc - -volfiles=${GLUSTERD_WORKDIR}/vols/${V0}/ -check_brick_volfiles () { - for vf in ${volfiles}${V0}.$(hostname).*.vol; do - grep -qs experimental/jbr $vf || return - # At least for now, nothing else would put a client translator - # in a brick volfile. - grep -qs protocol/client $vf || return - done - echo "OK" -} - -TEST glusterd -TEST pidof glusterd - -TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2} -TEST $CLI volume set $V0 cluster.jbr on - -# Check that the client volfile got modified properly. -TEST grep -qs experimental/jbrc ${volfiles}${V0}.tcp-fuse.vol - -# Check that the brick volfiles got modified as well. -EXPECT "OK" check_brick_volfiles - -# Put things back and make sure the "undo" worked. -TEST $CLI volume set $V0 cluster.jbr off -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 -echo hello > $M0/probe -EXPECT hello cat ${B0}/${V0}1/probe -EXPECT hello cat ${B0}/${V0}2/probe - -cleanup diff --git a/tests/basic/jbr/jbr.t b/tests/basic/jbr/jbr.t deleted file mode 100755 index 283446c9635..00000000000 --- a/tests/basic/jbr/jbr.t +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../../include.rc -. $(dirname $0)/../../volume.rc -. $(dirname $0)/../../cluster.rc -. $(dirname $0)/../../snapshot.rc - -cleanup; - -TEST verify_lvm_version; -#Create cluster with 3 nodes -TEST launch_cluster 3; -TEST setup_lvm 3 - -TEST $CLI_1 peer probe $H2; -TEST $CLI_1 peer probe $H3; -EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count; - -TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3 -TEST $CLI_1 volume set $V0 cluster.jbr on -#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG -TEST $CLI_1 volume start $V0 - -TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0; - -EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" jbrc_child_up_status $V0 0 - -echo "file" > $M0/file1 -TEST stat $L1/file1 -TEST stat $L2/file1 -TEST stat $L3/file1 - -cleanup; diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t deleted file mode 100644 index d7633a7ca7d..00000000000 --- a/tests/features/fdl-overflow.t +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc - -log_base=$($CLI --print-logdir) -log_id=${B0}/${V0}-0 -log_id=${log_id:1} # Remove initial slash -log_id=${log_id//\//-} # Replace remaining slashes with dashes - -_check_sizes () { - local n=0 - local sz - local total_sz=0 - - # We don't care about the sizes of the meta files. That would be - # embedding too much of the implementation into the test. - n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l) - [ $n = 2 ] || return 1 - - # We *do* care about the sizes of the data files, which should exactly - # reflect the amount of data written via dd. - n=0 - while read sz name; do - G_LOG "found journal ${name} size ${sz}MB" - n=$((n+1)) - total_sz=$((total_sz+sz)) - done < <(du -sm ${log_base}/${log_id}-data-*.jnl) - [ $n = 2 ] || return 1 - # On our CentOS and NetBSD regression-test systems, but not on my Fedora - # development system, each file ends up being slightly larger than its - # data size because of metadata, and 'du' rounds that up to a full extra - # megabyte. We'll allow either result, because what we're really - # looking for is a complete failure to roll over from one file to - # another at the appropriate size. - [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1 - - return 0 -} - -check_sizes () { - set -x - _check_sizes - ret=$? - set +x - return ret -} - -if [ x"$OSTYPE" = x"NetBSD" ]; then - CREAT_OFLAG="creat," -else - CREAT_OFLAG="" -fi - -TEST rm -f ${log_base}/${log_id}-*.log -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 changelog.changelog off -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and unmount/stop so we can see log sizes. -TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \ - oflag=${CREAT_OFLAG}sync -TEST umount $M0 -TEST $CLI volume stop $V0 - -TEST _check_sizes - -cleanup diff --git a/tests/features/fdl.t b/tests/features/fdl.t deleted file mode 100644 index 34d6d78228a..00000000000 --- a/tests/features/fdl.t +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc - -log_base=$($CLI --print-logdir) -log_id=${B0}/${V0}-0 -log_id=${log_id:1} # Remove initial slash -log_id=${log_id//\//-} # Replace remaining slashes with dashes -FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl -FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl - -check_logfile() { - [ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ] -} - -if [ x"$OSTYPE" = x"NetBSD" ]; then - CREAT_OFLAG="creat," -else - CREAT_OFLAG="" -fi - -TEST rm -f $FDL_META_FILE $FDL_DATA_FILE -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 changelog.changelog off -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and unmount. -TEST mkdir -p $M0/abc/def -TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \ - oflag=${CREAT_OFLAG}sync -TEST chmod 314 $M0/abc/def/ghi -TEST rm -rf $M0/abc -TEST umount $M0 - -# Check that gf_logdump works, and shows the ops we just issued. There will be -# more SETATTR ops than the one corresponding to our chmod, because some are -# issued internally. We have to guess a bit about where the log will be. -TEST check_logfile GF_FOP_MKDIR 2 -TEST check_logfile GF_FOP_CREATE 1 -TEST check_logfile GF_FOP_WRITE 2 -TEST check_logfile GF_FOP_SETATTR 1 -TEST check_logfile GF_FOP_UNLINK 1 -TEST check_logfile GF_FOP_RMDIR 2 - -cleanup diff --git a/tests/features/recon.t b/tests/features/recon.t deleted file mode 100644 index 9989f243380..00000000000 --- a/tests/features/recon.t +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../traps.rc -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc - -log_base=$($CLI --print-logdir) -log_id=${B0}/${V0}-0 -log_id=${log_id:1} # Remove initial slash -log_id=${log_id//\//-} # Replace remaining slashes with dashes -FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl -FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl - -tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX) -push_trapfunc "rm -rf $tmpdir" - -write_file () { - echo "peekaboo" > $1 -} - -TEST rm -f $FDL_META_FILE $FDL_DATA_FILE -TEST glusterd -TEST pidof glusterd - -# Get a simple volume set up and mounted with FDL active. -TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 -TEST $CLI volume set $V0 features.fdl on -TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id $V0 $M0 - -# Generate some I/O and then copy off the journal files for later. -TEST mkdir -p $M0/abc/def -TEST write_file $M0/abc/def/ghi -#EST chmod 314 $M0/abc/def/ghi -cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir} - -# Get back to an empty state and unmount. -TEST rm -rf $M0/abc -TEST umount $M0 - -# Make sure we really are in an empty state. Otherwise the tests below could -# pass just because we never cleaned up in the first place. -TEST [ ! -d ${B0}/${V0}-0/abc ] - -# Create a stub volfile. -vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol -vol_id_line=$(grep volume-id ${vol_file}) -cat > ${tmpdir}/recon.vol << EOF -volume recon-posix - type storage/posix - option directory ${B0}/${V0}-0 -${vol_id_line} -end-volume -EOF - -TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \ - ${tmpdir}/$(basename ${FDL_DATA_FILE}) - -TEST [ -d ${B0}/${V0}-0/abc/def ] -EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi -# TBD: test permissions, xattrs - -cleanup diff --git a/tests/volume.rc b/tests/volume.rc index f46f8a19e62..e488aa73b1c 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -110,24 +110,6 @@ function snap_client_connected_status { echo "$up" } -function _jbrc_child_up_status { - local vol=$1 - #brick_id is (brick-num in volume info - 1) - local brick_id=$2 - local gen_state_dump=$3 - local fpath=$($gen_state_dump $vol) - up=$(grep -a -B1 child_$brick_id=$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=') - rm -f $fpath - echo "$up" -} - -function jbrc_child_up_status { - local vol=$1 - #brick_id is (brick-num in volume info - 1) - local brick_id=$2 - _jbrc_child_up_status $vol $brick_id generate_mount_statedump -} - function _afr_child_up_status { local vol=$1 #brick_id is (brick-num in volume info - 1) diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am index f9a58914cc9..1b9f3d5746f 100644 --- a/xlators/experimental/Makefile.am +++ b/xlators/experimental/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = jbr-client jbr-server fdl +SUBDIRS = CLEANFILES = diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/fdl/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am deleted file mode 100644 index aed0204284f..00000000000 --- a/xlators/experimental/fdl/src/Makefile.am +++ /dev/null @@ -1,43 +0,0 @@ -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental -xlator_LTLIBRARIES = fdl.la - -noinst_HEADERS = jnl-types.h - -nodist_fdl_la_SOURCES = fdl.c -fdl_la_LDFLAGS = -module -avoid-version -fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -sbin_PROGRAMS = gf_logdump gf_recon -gf_logdump_SOURCES = logdump.c -nodist_gf_logdump_SOURCES = libfdl.c -gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/api/src/libgfapi.la - -# Eventually recon(ciliation) code will move elsewhere, but for now it's -# easier to have it next to the similar logdump code. -gf_recon_SOURCES = recon.c -nodist_gf_recon_SOURCES = librecon.c -gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/api/src/libgfapi.la - -AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/api/src -fPIC \ - -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ - -DDATADIR=\"$(localstatedir)\" - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py -EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c - -CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \ - $(nodist_gf_recon_SOURCES) - -fdl.c: fdl-tmpl.c gen_fdl.py - $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ - -libfdl.c: dump-tmpl.c gen_dumper.py - $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ - -librecon.c: recon-tmpl.c gen_recon.py - $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c deleted file mode 100644 index cac1071a9c1..00000000000 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ /dev/null @@ -1,156 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glfs.h" -#include "iatt.h" -#include "xlator.h" -#include "jnl-types.h" - -#pragma fragment DICT - { - int key_len, data_len; - char *key_ptr; - printf ("@ARGNAME@ = dict {\n"); - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int) + data_len; - printf (" %s = <%d bytes>\n", key_ptr, data_len); - } - printf ("}\n"); - } - -#pragma fragment DOUBLE - printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), - *((uint64_t *)new_meta)); - new_meta += sizeof(uint64_t); - -#pragma fragment GFID - printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - -#pragma fragment INTEGER - printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), - *((uint32_t *)new_meta)); - new_meta += sizeof(uint32_t); - -#pragma fragment LOC - printf ("@ARGNAME@ = loc {\n"); - printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - if (*(new_meta++)) { - printf (" name = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - printf ("}\n"); - -#pragma fragment STRING - if (*(new_meta++)) { - printf ("@ARGNAME@ = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - -#pragma fragment VECTOR - { - size_t len = *((size_t *)new_meta); - new_meta += sizeof(len); - printf ("@ARGNAME@ = <%zu bytes>\n", len); - new_data += len; - } - -#pragma fragment IATT - { - ia_prot_t *myprot = ((ia_prot_t *)new_meta); - printf ("@ARGNAME@ = iatt {\n"); - printf (" ia_prot = %c%c%c", - myprot->suid ? 'S' : '-', - myprot->sgid ? 'S' : '-', - myprot->sticky ? 'T' : '-'); - printf ("%c%c%c", - myprot->owner.read ? 'r' : '-', - myprot->owner.write ? 'w' : '-', - myprot->owner.exec ? 'x' : '-'); - printf ("%c%c%c", - myprot->group.read ? 'r' : '-', - myprot->group.write ? 'w' : '-', - myprot->group.exec ? 'x' : '-'); - printf ("%c%c%c\n", - myprot->other.read ? 'r' : '-', - myprot->other.write ? 'w' : '-', - myprot->other.exec ? 'x' : '-'); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - printf (" ia_uid = %u\n", myints[0]); - printf (" ia_gid = %u\n", myints[1]); - printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); - printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment FOP -void -fdl_dump_@NAME@ (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - - /* TBD: word size/endianness */ -@FUNCTION_BODY@ - - *old_meta = new_meta; - *old_data = new_data; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - fdl_dump_@NAME@ (&new_meta, &new_data); - break; - -#pragma fragment EPILOG -int -fdl_dump (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - static glfs_t *fs = NULL; - int recognized = 1; - event_header_t *eh; - - /* - * We don't really call anything else in GFAPI, but this is the most - * convenient way to satisfy all of the spurious dependencies on how it - * or glusterfsd initialize (e.g. setting up THIS). - */ - if (!fs) { - fs = glfs_new ("dummy"); - } - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - recognized = 0; - } - - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c deleted file mode 100644 index fdcfafbac31..00000000000 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ /dev/null @@ -1,506 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fcntl.h> -#include <unistd.h> -#include <sys/mman.h> -#include "call-stub.h" -#include "iatt.h" -#include "defaults.h" -#include "syscall.h" -#include "xlator.h" -#include "jnl-types.h" - -/* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) - -enum gf_fdl { - gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, - gf_fdl_mt_end -}; - -typedef struct { - char *type; - off_t size; - char *path; - int fd; - void * ptr; - off_t max_offset; -} log_obj_t; - -typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; -} fdl_private_t; - -void -fdl_enqueue (xlator_t *this, call_stub_t *stub) -{ - fdl_private_t *priv = this->private; - - pthread_mutex_lock (&priv->req_lock); - list_add_tail (&stub->list, &priv->reqs); - pthread_mutex_unlock (&priv->req_lock); - - pthread_cond_signal (&priv->req_cond); -} - -#pragma generate - -char * -fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) -{ - fdl_private_t *priv = this->private; - int ret; - char * ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", - priv->log_dir, this->ctx->cmd_args.log_ident, - obj->type, term); - } - else { - ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", - priv->log_dir, obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log (this->name, GF_LOG_ERROR, - "failed to construct log-file path"); - goto err; - } - - gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", - obj->path, obj->size); - - obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to open log file (%s)", strerror(errno)); - goto err; - } - -#if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd,0,0,obj->size) < 0) -#endif - { - gf_log (this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); - (void) sys_write (obj->fd, "", 1); - } - - ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; - -err: - if (obj->fd >= 0) { - sys_close (obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } - return ptr; -} - -void -fdl_close_term_log (xlator_t *this, log_obj_t *obj) -{ - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void) munmap (obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log (this->name, GF_LOG_INFO, - "truncating term %d %s journal to %ld", - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", - strerror(errno)); - } - sys_close (obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } -} - -gf_boolean_t -fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) -{ - fdl_private_t *priv = this->private; - - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - - ++(priv->term); - - *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } - - *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } - - return _gf_true; -} - -void * -fdl_worker (void *arg) -{ - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char * meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char * data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void * msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!meta_ptr) { - goto *err_label; - } - data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log (this, &priv->meta_log); - goto *err_label; - } - - for (;;) { - pthread_mutex_lock (&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait (&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { - goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, - &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry (priv->reqs.next, call_stub_t, list); - list_del_init (&stub->list); - pthread_mutex_unlock (&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log (this->name, GF_LOG_DEBUG, - "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; - } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize (stub, meta_ptr + *meta_offset, - data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long) (meta_ptr + *meta_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_meta_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", - strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; - } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long) (data_ptr + *data_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_data_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request data (%s)", - strerror(errno)); - } - *data_offset += stub->jnl_data_len; - } - call_resume (stub); - } - -err_locked: - pthread_mutex_unlock (&priv->req_lock); -err_unlocked: - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - return NULL; -} - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; - - switch (op) { - - case FDL_IPC_CHANGE_TERM: - gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal (&priv->req_cond); - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - break; - - case FDL_IPC_GET_TERMS: - gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new (); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict,"first",priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict,"last",priv->term) != 0) { - goto gt_done; - } - gt_err = 0; - gt_done: - if (gt_err) { - STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref (tdict); - } - break; - - default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } - - return 0; -} - -int -fdl_init (xlator_t *this) -{ - fdl_private_t *priv = NULL; - - priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD (&priv->reqs); - if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init (&priv->req_cond, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT ("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - this->fops->ipc = fdl_ipc; - - if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to start fdl_worker"); - goto err; - } - - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -fdl_fini (xlator_t *this) -{ - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal (&priv->req_cond); - pthread_join (priv->worker, NULL); - GF_FREE(priv); - } -} - -int -fdl_reconfigure (xlator_t *this, dict_t *options) -{ - fdl_private_t *priv = this->private; - - GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ - -out: - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("fdl", this, out); - - ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } -out: - return ret; -} - -class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, -}; - -struct volume_options options[] = { - { .key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files." - }, - { .key = {NULL} }, -}; - -struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, -}; diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py deleted file mode 100755 index 42db55d2cb3..00000000000 --- a/xlators/experimental/fdl/src/gen_dumper.py +++ /dev/null @@ -1,116 +0,0 @@ -#!/usr/bin/python - -import os -import re -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# See the big header comment at the start of gen_fdl.py to see how the stages -# fit together. The big difference here is that *all* of the C code is in the -# template file as labelled fragments, instead of as Python strings. That -# makes it much easier to edit in one place, with proper syntax highlighting -# and indentation. -# -# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of -# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. -# -# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and -# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution -# in the middle of each function) is emitted immediately; the expanded CASE -# code is saved for the next stage. -# -# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code -# in the middle of EPILOG, to generate the whole output file. -# -# Another way of looking at it is to consider how the fragments appear in -# the final output: -# -# PROLOG -# FOP (expanded for CREATE) -# FOP before FUNCTION_BODY -# LOC, INTEGER, GFID, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# FOP (expanded for WRITEV) -# FOP before FUNCTION_BODY -# GFID, VECTOR, etc. (on per arg, by type) -# FOP after FUNCTION_BODY -# (more FOPs) -# EPILOG -# EPILOG before CASE -# CASE statements (one per fop) -# EPILOG after CASE - -typemap = { - 'dict_t *': ( "DICT", ""), - 'fd_t *': ( "GFID", ""), - 'dev_t': ( "DOUBLE", "%ld (0x%lx)"), - 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"), - 'int32_t': ( "INTEGER", "%d (0x%x)"), - 'mode_t': ( "INTEGER", "%d (0x%x)"), - 'off_t': ( "DOUBLE", "%ld (0x%lx)"), - 'size_t': ( "DOUBLE", "%ld (0x%lx)"), - 'uint32_t': ( "INTEGER", "%d (0x%x)"), - 'loc_t *': ( "LOC", ""), - 'const char *': ( "STRING", ""), - 'struct iovec *': ( "VECTOR", ""), - 'struct iatt *': ( "IATT", ""), -} - -def get_special_subs (args): - code = "" - for arg in args: - if (arg[0] != 'fop-arg') or (len(arg) < 4): - continue - recon_type, recon_fmt = typemap[arg[2]] - code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \ - .replace("@FORMAT@",recon_fmt) - return code - -def gen_functions (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value) - # Print the FOP fragment with @FUNCTION_BODY@ in the middle. - code += generate(fragments["FOP"],name,fop_subs) - return code - -def gen_cases (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - # Add the CASE fragment for this fop. - code += generate(fragments["CASE"],name,fop_subs) - return code - -def load_fragments (path="recon-tmpl.c"): - pragma_re = re.compile('pragma fragment (.*)') - cur_symbol = None - cur_value = "" - result = {} - for line in open(path,"r").readlines(): - m = pragma_re.search(line) - if m: - if cur_symbol: - result[cur_symbol] = cur_value - cur_symbol = m.group(1) - cur_value = "" - else: - cur_value += line - if cur_symbol: - result[cur_symbol] = cur_value - return result - -if __name__ == "__main__": - fragments = load_fragments(sys.argv[1]) - print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" - print fragments["PROLOG"] - print gen_functions() - print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) - print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py deleted file mode 100755 index 7f6b1aaaeaa..00000000000 --- a/xlators/experimental/fdl/src/gen_fdl.py +++ /dev/null @@ -1,328 +0,0 @@ -#!/usr/bin/python - -import os -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# Generation occurs in three stages. In this case, it actually makes more -# sense to discuss them in the *opposite* order of that in which they -# actually happen. -# -# Stage 3 is to insert all of the generated code into a file, replacing the -# "#pragma generate" that's already there. The file can thus contain all -# sorts of stuff that's not specific to one fop, either before or after the -# generated code as appropriate. -# -# Stage 2 is to generate all of the code *for a particular fop*, using a -# string-valued template plus a table of substitution values. Most of these -# are built in to the generator itself. However, we also add a couple that -# are specific to this particular translator - LEN_CODE and SER_CODE. These -# are per-fop functions to get the length or the contents (respectively) of -# what we'll put in the log. As with stage 3 allowing per-file boilerplate -# before and after generated code, this allows per-fop boilerplate before and -# after generated code. -# -# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for -# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS -# already are. We do this by looking at the fop-description table in the -# generator module, then doing out own template substitution to plug each -# specific argument name into another string-valued template. -# -# So, what does this leave us with in terms of variables and files? -# -# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE -# strings, which are used to generate the length and serialization code for -# each argument type. -# -# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_ -# prefix), which are used (along with the output from stage 1) to generate -# whole functions. -# -# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert -# the collection of all functions defined in stage 2. - - -LEN_TEMPLATE = """ -void -fdl_len_@NAME@ (call_stub_t *stub) -{ - uint32_t meta_len = sizeof (event_header_t); - uint32_t data_len = 0; - - /* TBD: global stuff, e.g. uid/gid */ -@LEN_CODE@ - - /* TBD: pad extension length */ - stub->jnl_meta_len = meta_len; - stub->jnl_data_len = data_len; -} -""" - -SER_TEMPLATE = """ -void -fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf) -{ - event_header_t *eh; - unsigned long offset = 0; - - /* TBD: word size/endianness */ - eh = (event_header_t *)meta_buf; - eh->event_type = NEW_REQUEST; - eh->fop_type = GF_FOP_@UPNAME@; - eh->request_id = 0; // TBD - meta_buf += sizeof (*eh); -@SER_CODE@ - /* TBD: pad extension length */ - eh->ext_length = offset; -} -""" - -CBK_TEMPLATE = """ -int32_t -fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; -} -""" - -CONTINUE_TEMPLATE = """ -int32_t -fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - STACK_WIND (frame, fdl_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; -} - -""" - -FOP_TEMPLATE = """ -int32_t -fdl_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - call_stub_t *stub; - - stub = fop_@NAME@_stub (frame, default_@NAME@, - @SHORT_ARGS@); - fdl_len_@NAME@ (stub); - stub->serialize = fdl_serialize_@NAME@; - fdl_enqueue (this, stub); - - return 0; -} -""" - -LEN_DICT_TEMPLATE = """ - if (@SRC@) { - data_pair_t *memb; - for (memb = @SRC@->members_list; memb; memb = memb->next) { - meta_len += sizeof(int); - meta_len += strlen(memb->key) + 1; - meta_len += sizeof(int); - meta_len += memb->value->len; - } - } - meta_len += sizeof(int); -""" - -LEN_GFID_TEMPLATE = """ - meta_len += 16; -""" - -LEN_INTEGER_TEMPLATE = """ - meta_len += sizeof (@SRC@); -""" - -# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL -LEN_LOC_TEMPLATE = """ - if (@SRC@.name) { - meta_len += (strlen (@SRC@.name) + 34); - } else { - meta_len += 33; - } -""" - -LEN_STRING_TEMPLATE = """ - if (@SRC@) { - meta_len += (strlen (@SRC@) + 1); - } else { - meta_len += 1; - } -""" - -LEN_VECTOR_TEMPLATE = """ - meta_len += sizeof(size_t); - data_len += iov_length (@VEC@, @CNT@); -""" - -LEN_IATT_TEMPLATE = """ - meta_len += sizeof(@SRC@.ia_prot); - meta_len += sizeof(@SRC@.ia_uid); - meta_len += sizeof(@SRC@.ia_gid); - meta_len += sizeof(@SRC@.ia_atime); - meta_len += sizeof(@SRC@.ia_atime_nsec); - meta_len += sizeof(@SRC@.ia_mtime); - meta_len += sizeof(@SRC@.ia_mtime_nsec); -""" - -SERLZ_DICT_TEMPLATE = """ - if (@SRC@) { - data_pair_t *memb; - for (memb = @SRC@->members_list; memb; memb = memb->next) { - *((int *)(meta_buf+offset)) = strlen(memb->key) + 1; - offset += sizeof(int); - strcpy (meta_buf+offset, memb->key); - offset += strlen(memb->key) + 1; - *((int *)(meta_buf+offset)) = memb->value->len; - offset += sizeof(int); - memcpy (meta_buf+offset, memb->value->data, memb->value->len); - offset += memb->value->len; - } - } - *((int *)(meta_buf+offset)) = 0; - offset += sizeof(int); -""" - -SERLZ_GFID_TEMPLATE = """ - memcpy (meta_buf+offset, @SRC@->inode->gfid, 16); - offset += 16; -""" - -SERLZ_INTEGER_TEMPLATE = """ - memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@)); - offset += sizeof(@SRC@); -""" - -SERLZ_LOC_TEMPLATE = """ - memcpy (meta_buf+offset, @SRC@.gfid, 16); - offset += 16; - memcpy (meta_buf+offset, @SRC@.pargfid, 16); - offset += 16; - if (@SRC@.name) { - *(meta_buf+offset) = 1; - ++offset; - strcpy (meta_buf+offset, @SRC@.name); - offset += (strlen (@SRC@.name) + 1); - } else { - *(meta_buf+offset) = 0; - ++offset; - } -""" - -SERLZ_STRING_TEMPLATE = """ - if (@SRC@) { - *(meta_buf+offset) = 1; - ++offset; - strcpy (meta_buf+offset, @SRC@); - offset += strlen(@SRC@); - } else { - *(meta_buf+offset) = 0; - ++offset; - } -""" - -SERLZ_VECTOR_TEMPLATE = """ - *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@); - offset += sizeof(size_t); - int32_t i; - for (i = 0; i < @CNT@; ++i) { - memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len); - data_buf += @VEC@[i].iov_len; - } -""" - -# We don't need to save all of the fields - only those affected by chown, -# chgrp, chmod, and utime. -SERLZ_IATT_TEMPLATE = """ - *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot; - offset += sizeof(@SRC@.ia_prot); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid; - offset += sizeof(@SRC@.ia_uid); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid; - offset += sizeof(@SRC@.ia_gid); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime; - offset += sizeof(@SRC@.ia_atime); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec; - offset += sizeof(@SRC@.ia_atime_nsec); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime; - offset += sizeof(@SRC@.ia_mtime); - *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec; - offset += sizeof(@SRC@.ia_mtime_nsec); -""" - -typemap = { - 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE), - 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE), - 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), - 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE), - 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE), - 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE), -} - -def get_special_subs (args): - len_code = "" - ser_code = "" - for arg in args: - if (arg[0] != 'fop-arg') or (len(arg) < 4): - continue - # Let this throw an exception if we get an unknown field name. The - # broken build will remind whoever messed with the stub code that a - # corresponding update is needed here. - if arg[3] == "vector": - # Make it as obvious as possible that this is a special case. - len_code += LEN_VECTOR_TEMPLATE \ - .replace("@VEC@","stub->args.vector") \ - .replace("@CNT@","stub->args.count") - ser_code += SERLZ_VECTOR_TEMPLATE \ - .replace("@VEC@","stub->args.vector") \ - .replace("@CNT@","stub->args.count") - else: - len_tmpl, ser_tmpl = typemap[arg[2]] - src = "stub->args.%s" % arg[3] - len_code += len_tmpl.replace("@SRC@",src) - ser_code += ser_tmpl.replace("@SRC@",src) - return len_code, ser_code - -def gen_fdl (): - entrypoints = [] - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - len_code, ser_code = get_special_subs(value) - fop_subs[name]["@LEN_CODE@"] = len_code[:-1] - fop_subs[name]["@SER_CODE@"] = ser_code[:-1] - print generate(LEN_TEMPLATE,name,fop_subs) - print generate(SER_TEMPLATE,name,fop_subs) - print generate(CBK_TEMPLATE,name,cbk_subs) - print generate(CONTINUE_TEMPLATE,name,fop_subs) - print generate(FOP_TEMPLATE,name,fop_subs) - entrypoints.append(name) - print "struct xlator_fops fops = {" - for ep in entrypoints: - print "\t.%s = fdl_%s," % (ep, ep) - print "};" - -for l in open(sys.argv[1],'r').readlines(): - if l.find('#pragma generate') != -1: - print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" - gen_fdl() - print "/* END GENERATED CODE */" - else: - print l[:-1] diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py deleted file mode 100755 index 67f9ea9ebd3..00000000000 --- a/xlators/experimental/fdl/src/gen_recon.py +++ /dev/null @@ -1,213 +0,0 @@ -#!/usr/bin/python - -import os -import re -import string -import sys - -curdir = os.path.dirname (sys.argv[0]) -gendir = os.path.join (curdir, '../../../../libglusterfs/src') -sys.path.append (gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# See the big header comment at the start of gen_fdl.py to see how the stages -# fit together. The big difference here is that *all* of the C code is in the -# template file as labelled fragments, instead of as Python strings. That -# makes it much easier to edit in one place, with proper syntax highlighting -# and indentation. -# -# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of -# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. -# -# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and -# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution -# in the middle of each function) is emitted immediately; the expanded CASE -# code is saved for the next stage. -# -# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code -# in the middle of EPILOG, to generate the whole output file. -# -# Another way of looking at it is to consider how the fragments appear in -# the final output: -# -# PROLOG -# FOP (expanded for CREATE) -# FOP before FUNCTION_BODY -# LOC, INTEGER, GFID, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# FOP (expanded for WRITEV) -# FOP before FUNCTION_BODY -# GFID, VECTOR, etc. (one per arg, by type) -# FOP after FUNCTION_BODY -# (more FOPs) -# EPILOG -# EPILOG before CASE -# CASE statements (one per fop) -# EPILOG after CASE - -typemap = { - 'dict_t *': "DICT", - 'fd_t *': "FD", - 'dev_t': "DOUBLE", - 'gf_xattrop_flags_t': "INTEGER", - 'int32_t': "INTEGER", - 'mode_t': "INTEGER", - 'off_t': "DOUBLE", - 'size_t': "DOUBLE", - 'uint32_t': "INTEGER", - 'loc_t *': "LOC", - 'const char *': "STRING", - 'struct iovec *': "VECTOR", - 'struct iatt *': "IATT", - 'struct iobref *': "IOBREF", -} - -def get_special_subs (name, args, fop_type): - code = "" - cleanups = "" - links = "" - s_args = [] - for arg in args: - if arg[0] == 'extra': - code += "\t%s %s;\n\n" % (arg[2], arg[1]) - s_args.append(arg[3]) - continue - if arg[0] == 'link': - links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \ - .replace("@IATT_ARG@",arg[2]) - continue - if arg[0] != 'fop-arg': - continue - if (name, arg[1]) == ('writev', 'count'): - # Special case: just skip this. We can't mark it as 'nosync' - # because of the way the translator and dumper generators look for - # that after 'stub-name' which we don't define. Instead of adding a - # bunch of generic infrastructure for this one case, just pound it - # here. - continue - recon_type = typemap[arg[2]] - # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type) - if (name == "create") and (arg[1] == "fd"): - # Special case: fd for create is new, not looked up. - # print "/* change to NEW_FD */" - recon_type = "NEW_FD" - elif (recon_type == "LOC") and (fop_type == "entry-op"): - # Need to treat this differently for inode vs. entry ops. - # Special case: link source is treated like inode-op. - if (name != "link") or (arg[1] != "oldloc"): - # print "/* change to PARENT_LOC */" - recon_type = "PARENT_LOC" - code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \ - .replace("@ARGTYPE@",arg[2]) - cleanup_key = recon_type + "_CLEANUP" - if fragments.has_key(cleanup_key): - new_frag = fragments[cleanup_key].replace("@ARGNAME@",arg[1]) - # Make sure these get added in *reverse* order. Otherwise, a - # failure for an earlier argument might goto a label that falls - # through to the cleanup code for a variable associated with a - # later argument, but that variable might not even have been - # *declared* (let alone initialized) yet. Consider the following - # case. - # - # process argument A (on failure goto cleanup_A) - # set error label to cleanup_A - # - # declare pointer variable for argument B - # process argument B (on failure goto cleanup_B) - # - # cleanup_A: - # /* whatever */ - # cleanup_B: - # free pointer variable <= "USED BUT NOT SET" error here - # - # By adding these in reverse order, we ensure that cleanup_B is - # actually *before* cleanup_A, and nothing will try to do the free - # until we've actually attempted processing of B. - cleanups = new_frag + cleanups - if 'nosync' in arg[4:]: - code += "\t(void)%s;\n" % arg[1]; - continue - if arg[2] in ("loc_t *", "struct iatt *"): - # These are passed as pointers to the syncop, but they're actual - # structures in the generated code. - s_args.append("&"+arg[1]); - else: - s_args.append(arg[1]) - # We have to handle a couple of special cases here, because some n00b - # defined the syncops with a different argument order than the fops they're - # based on. - if name == 'writev': - # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which - # is not stored in or read from the journal. There are other ways to - # do that, but this is the only place we need anything similar and we - # already have to treat it as a special case so this is simplest. - s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata' - elif name == 'symlink': - # Swap 'linkpath' and 'loc'. - s_args_str = '&loc, linkpath, &iatt, xdata' - else: - s_args_str = string.join (s_args, ", ") - return code, links, s_args_str, cleanups - -# TBD: probably need to generate type-specific cleanup code as well - e.g. -# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these -# generated CLEANUP fragments will go at the end of the function, with goto -# labels. Meanwhile, the error-checking part of each type-specific fragment -# (e.g. LOC or FD) will need to update the indirect label that we jump to when -# an error is detected. This will probably get messy. -def gen_functions (): - code = "" - for name, value in ops.iteritems(): - fop_type = [ x[1] for x in value if x[0] == "journal" ] - if not fop_type: - continue - body, links, syncop_args, cleanups = get_special_subs (name, value, - fop_type[0]) - fop_subs[name]["@FUNCTION_BODY@"] = body - fop_subs[name]["@LINKS@"] = links - fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args - fop_subs[name]["@CLEANUPS@"] = cleanups - if name == "writev": - # Take advantage of the fact that, *during reconciliation*, the - # vector is always a single element. In normal I/O it's not. - fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len" - else: - fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS" - # Print the FOP fragment with @FUNCTION_BODY@ in the middle. - code += generate(fragments["FOP"],name,fop_subs) - return code - -def gen_cases (): - code = "" - for name, value in ops.iteritems(): - if "journal" not in [ x[0] for x in value ]: - continue - # Add the CASE fragment for this fop. - code += generate(fragments["CASE"],name,fop_subs) - return code - -def load_fragments (path="recon-tmpl.c"): - pragma_re = re.compile('pragma fragment (.*)') - cur_symbol = None - cur_value = "" - result = {} - for line in open(path,"r").readlines(): - m = pragma_re.search(line) - if m: - if cur_symbol: - result[cur_symbol] = cur_value - cur_symbol = m.group(1) - cur_value = "" - else: - cur_value += line - if cur_symbol: - result[cur_symbol] = cur_value - return result - -if __name__ == "__main__": - fragments = load_fragments(sys.argv[1]) - print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" - print fragments["PROLOG"] - print gen_functions() - print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) - print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h deleted file mode 100644 index 8cb39d01a25..00000000000 --- a/xlators/experimental/fdl/src/jnl-types.h +++ /dev/null @@ -1,14 +0,0 @@ -#define NEW_REQUEST (uint8_t)'N' - -typedef struct { - uint8_t event_type; /* e.g. NEW_REQUEST */ - uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ - uint16_t request_id; - uint32_t ext_length; -} event_header_t; - -enum { - FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ - FDL_IPC_CHANGE_TERM, - FDL_IPC_GET_TERMS, -}; diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c deleted file mode 100644 index 7c979c32a04..00000000000 --- a/xlators/experimental/fdl/src/logdump.c +++ /dev/null @@ -1,50 +0,0 @@ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/mman.h> - -extern int fdl_dump (char **, char **); - -int -main (int argc, char **argv) -{ - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; - - meta_fd = open (argv[1], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - data_fd = open (argv[2], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - for (;;) { - if (!fdl_dump(&meta_buf,&data_buf)) { - break; - } - } - - return EXIT_SUCCESS; -} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c deleted file mode 100644 index 523bda39418..00000000000 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ /dev/null @@ -1,305 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "fd.h" -#include "iatt.h" -#include "syncop.h" -#include "xlator.h" -#include "glfs-internal.h" - -#include "jnl-types.h" - -#define GFAPI_SUCCESS 0 - -inode_t * -recon_get_inode (glfs_t *fs, uuid_t gfid) -{ - inode_t *inode; - loc_t loc = {NULL,}; - struct iatt iatt; - int ret; - inode_t *newinode; - - inode = inode_find (fs->active_subvol->itable, gfid); - if (inode) { - printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); - return inode; - } - - loc.inode = inode_new (fs->active_subvol->itable); - if (!loc.inode) { - return NULL; - } - gf_uuid_copy (loc.inode->gfid, gfid); - gf_uuid_copy (loc.gfid, gfid); - - printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - - ret = syncop_lookup (fs->active_subvol, &loc, &iatt, - NULL, NULL, NULL); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "syncop_lookup failed (%d)\n", ret); - return NULL; - } - - newinode = inode_link (loc.inode, NULL, NULL, &iatt); - if (newinode) { - inode_lookup (newinode); - } - - return newinode; -} - -#pragma fragment DICT - dict_t *@ARGNAME@; - - @ARGNAME@ = dict_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - - { - int key_len, data_len; - char *key_ptr; - int garbage; - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int); - garbage = dict_set_static_bin (@ARGNAME@, key_ptr, - new_meta, data_len); - /* TBD: check error from dict_set_static_bin */ - (void)garbage; - new_meta += data_len; - } - } - -#pragma fragment DICT_CLEANUP -cleanup_@ARGNAME@: - dict_unref (@ARGNAME@); - -#pragma fragment DOUBLE - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - new_meta += sizeof(uint64_t); - -#pragma fragment FD - inode_t *@ARGNAME@_ino; - fd_t *@ARGNAME@; - - @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); - new_meta += 16; - if (!@ARGNAME@_ino) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@_ino; - - @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); -cleanup_@ARGNAME@_ino: - inode_unref (@ARGNAME@_ino); - -#pragma fragment NEW_FD - /* - * This pseudo-type is only used for create, and in that case we know - * we'll be using loc.inode, so it's not worth generalizing to take an - * extra argument. - */ - fd_t *@ARGNAME@ = fd_anonymous (loc.inode); - - if (!fd) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - new_meta += 16; - -#pragma fragment NEW_FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); - -#pragma fragment INTEGER - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - - new_meta += sizeof(@ARGTYPE@); - -#pragma fragment LOC - loc_t @ARGNAME@ = { NULL, }; - - @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.inode) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); - new_meta += 16; - new_meta += 16; /* skip over pargfid */ - if (*(new_meta++)) { - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - } - -#pragma fragment LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment PARENT_LOC - loc_t @ARGNAME@ = { NULL, }; - - new_meta += 16; /* skip over gfid */ - @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.parent) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); - new_meta += 16; - if (!*(new_meta++)) { - goto *err_label; - } - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - - @ARGNAME@.inode = inode_new (fs->active_subvol->itable); - if (!@ARGNAME@.inode) { - goto *err_label; - } - -#pragma fragment PARENT_LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment STRING - char *@ARGNAME@; - if (*(new_meta++)) { - @ARGNAME@ = new_meta; - new_meta += (strlen(new_meta) + 1); - } - else { - goto *err_label; - } - -#pragma fragment VECTOR - struct iovec @ARGNAME@; - - @ARGNAME@.iov_len = *((size_t *)new_meta); - new_meta += sizeof(@ARGNAME@.iov_len); - @ARGNAME@.iov_base = new_data; - new_data += @ARGNAME@.iov_len; - -#pragma fragment IATT - struct iatt @ARGNAME@; - { - @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - @ARGNAME@.ia_uid = myints[0]; - @ARGNAME@.ia_gid = myints[1]; - @ARGNAME@.ia_atime = myints[2]; - @ARGNAME@.ia_atime_nsec = myints[3]; - @ARGNAME@.ia_mtime = myints[4]; - @ARGNAME@.ia_mtime_nsec = myints[5]; - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment IOBREF - struct iobref *@ARGNAME@; - - @ARGNAME@ = iobref_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment IOBREF_CLEANUP -cleanup_@ARGNAME@: - iobref_unref (@ARGNAME@); - -#pragma fragment LINK - /* TBD: check error */ - inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); - if (new_inode) { - inode_lookup (new_inode); - } - -#pragma fragment FOP -int -fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int ret; - int status = 0xbad; - void *err_label = &&done; - -@FUNCTION_BODY@ - - ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); - if (ret != @SUCCESS_VALUE@) { - fprintf (stderr, "syncop_@NAME@ returned %d", ret); - goto *err_label; - } - -@LINKS@ - - status = 0; - -@CLEANUPS@ - -done: - *old_meta = new_meta; - *old_data = new_data; - return status; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { - goto done; - } - recognized = 1; - break; - -#pragma fragment EPILOG -int -recon_execute (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int recognized = 0; - event_header_t *eh; - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - } - -done: - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c deleted file mode 100644 index 14168a011e0..00000000000 --- a/xlators/experimental/fdl/src/recon.c +++ /dev/null @@ -1,89 +0,0 @@ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/mman.h> - -#include "glusterfs.h" -#include "fd.h" -#include "syncop.h" -#include "glfs-internal.h" - -#define GFAPI_SUCCESS 0 - -extern int recon_execute (glfs_t *, char **, char **); - -int -main (int argc, char **argv) -{ - glfs_t *fs; - int ret; - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; - - fs = glfs_new ("whocares"); - if (!fs) { - fprintf (stderr, "glfs_new failed\n"); - return EXIT_FAILURE; - } - - if (getenv("RECON_DEBUG")) { - ret = glfs_set_logging (fs, "/dev/stderr", 7); - } - else { - ret = glfs_set_logging (fs, "/dev/null", 0); - } - - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); - return EXIT_FAILURE; - } - - ret = glfs_set_volfile (fs, argv[1]); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); - return EXIT_FAILURE; - } - - ret = glfs_init (fs); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_init failed (%d)\n", errno); - return EXIT_FAILURE; - } - - meta_fd = open (argv[2], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - data_fd = open (argv[3], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - for (;;) { - if (!recon_execute(fs,&meta_buf,&data_buf)) { - break; - } - } - - return EXIT_SUCCESS; -} diff --git a/xlators/experimental/jbr-client/Makefile.am b/xlators/experimental/jbr-client/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/jbr-client/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am deleted file mode 100644 index 58f399f0607..00000000000 --- a/xlators/experimental/jbr-client/src/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -xlator_LTLIBRARIES = jbrc.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -nodist_jbrc_la_SOURCES = jbrc-cg.c -CLEANFILES = $(nodist_jbrc_la_SOURCES) - -jbrc_la_LDFLAGS = -module -avoid-version -jbrc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -noinst_HEADERS = \ - $(top_srcdir)/xlators/lib/src/libxlator.h \ - $(top_srcdir)/glusterfsd/src/glusterfsd.h \ - jbrc.h jbr-messages.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) \ - -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \ - -I$(top_srcdir)/rpc/rpc-lib/src - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src -JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py -JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c -JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c -noinst_PYTHON = $(JBRC_GEN_FOPS) -EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER) - -jbrc-cg.c: $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) - $(PYTHON) $(JBRC_GEN_FOPS) $(JBRC_TEMPLATES) $(JBRC_WRAPPER) > $@ - -uninstall-local: - rm -f $(DESTDIR)$(xlatordir)/jbr.so diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c deleted file mode 100644 index 7719f511f01..00000000000 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ /dev/null @@ -1,113 +0,0 @@ -/* template-name fop */ -int32_t -jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = NULL; - xlator_t *target_xl = ACTIVE_CHILD(this); - - local = mem_get(this->local_pool); - if (!local) { - goto err; - } - - local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - local->curr_xl = target_xl; - local->scars = 0; - - frame->local = local; - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, - target_xl, target_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - if (local) { - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, - @ERROR_ARGS@); - return 0; -} - -/* template-name cbk */ -int32_t -jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - xlator_t *last_xl = cookie; - xlator_t *next_xl; - jbrc_private_t *priv = this->private; - struct timespec spec; - - if (op_ret != (-1)) { - if (local->scars) { - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retried %p OK"), frame->local); - } - priv->active = last_xl; - goto unwind; - } - if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { - goto unwind; - } - - /* TBD: get leader ID from xdata? */ - next_xl = next_xlator(this, last_xl); - /* - * We can't just give up after we've tried all bricks, because it's - * quite likely that a new leader election just hasn't finished yet. - * We also shouldn't retry endlessly, and especially not at a high - * rate, but that's good enough while we work on other things. - * - * TBD: implement slow/finite retry via a worker thread - */ - if (!next_xl || (local->scars >= SCAR_LIMIT)) { - gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, - HILITE("ran out of retries for %p"), frame->local); - goto unwind; - } - - local->curr_xl = next_xl; - local->scars += 1; - spec.tv_sec = 1; - spec.tv_nsec = 0; - /* - * WARNING - * - * Just calling gf_timer_call_after like this leaves open the - * possibility that writes will get reordered, if a first write is - * rescheduled and then a second comes along to find an updated - * priv->active before the first actually executes. We might need to - * implement a stricter (and more complicated) queuing mechanism to - * ensure absolute consistency in this case. - */ - if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { - return 0; - } - -unwind: - call_stub_destroy(local->stub); - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; -} - -/* template-name cont-func */ -int32_t -jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, - local->curr_xl, local->curr_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; -} diff --git a/xlators/experimental/jbr-client/src/gen-fops.py b/xlators/experimental/jbr-client/src/gen-fops.py deleted file mode 100755 index 4d9451f7177..00000000000 --- a/xlators/experimental/jbr-client/src/gen-fops.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/python - -import os -import re -import string -import sys - -curdir = os.path.dirname(sys.argv[0]) -gendir = os.path.join(curdir,'../../../../libglusterfs/src') -sys.path.append(gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# We really want the callback argument list, even when we're generating fop -# code, so we propagate here. -# TBD: this should probably be right in generate.py -for k, v in cbk_subs.iteritems(): - fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@'] - -# Stolen from old codegen.py -def load_templates (path): - templates = {} - tmpl_re = re.compile("/\* template-name (.*) \*/") - templates = {} - t_name = None - for line in open(path,"r").readlines(): - if not line: - break - m = tmpl_re.match(line) - if m: - if t_name: - templates[t_name] = string.join(t_contents,'') - t_name = m.group(1).strip() - t_contents = [] - elif t_name: - t_contents.append(line) - if t_name: - templates[t_name] = string.join(t_contents,'') - return templates - -# Stolen from gen_fdl.py -def gen_client (templates): - for name, value in ops.iteritems(): - if name == 'getspec': - # It's not real if it doesn't have a stub function. - continue - print generate(templates['cbk'],name,cbk_subs) - print generate(templates['cont-func'],name,fop_subs) - print generate(templates['fop'],name,fop_subs) - -tmpl = load_templates(sys.argv[1]) -for l in open(sys.argv[2],'r').readlines(): - if l.find('#pragma generate') != -1: - print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" - gen_client(tmpl) - print "/* END GENERATED CODE */" - else: - print l[:-1] diff --git a/xlators/experimental/jbr-client/src/jbr-messages.h b/xlators/experimental/jbr-client/src/jbr-messages.h deleted file mode 100644 index 61fa725d56a..00000000000 --- a/xlators/experimental/jbr-client/src/jbr-messages.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _JBR_MESSAGES_H_ -#define _JBR_MESSAGES_H_ - -#include "glfs-message-id.h" - -/* NOTE: Rules for message additions - * 1) Each instance of a message is _better_ left with a unique message ID, even - * if the message format is the same. Reasoning is that, if the message - * format needs to change in one instance, the other instances are not - * impacted or the new change does not change the ID of the instance being - * modified. - * 2) Addition of a message, - * - Should increment the GLFS_NUM_MESSAGES - * - Append to the list of messages defined, towards the end - * - Retain macro naming as glfs_msg_X (for redability across developers) - * NOTE: Rules for message format modifications - * 3) Check acorss the code if the message ID macro in question is reused - * anywhere. If reused then then the modifications should ensure correctness - * everywhere, or needs a new message ID as (1) above was not adhered to. If - * not used anywhere, proceed with the required modification. - * NOTE: Rules for message deletion - * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used - * anywhere, then can be deleted, but will leave a hole by design, as - * addition rules specify modification to the end of the list and not filling - * holes. - */ - -#define JBR_COMP_BASE GLFS_MSGID_COMP_JBR -#define GLFS_NUM_MESSAGES 1 -#define GLFS_MSGID_END (JBR_COMP_BASE + GLFS_NUM_MESSAGES + 1) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_INIT_FAIL (JBR_COMP_BASE + 1) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_RETRY_MSG (JBR_COMP_BASE + 2) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_MEM_ERR (JBR_COMP_BASE + 3) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_DICT_FLR (JBR_COMP_BASE + 4) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_GENERIC (JBR_COMP_BASE + 5) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_INVALID (JBR_COMP_BASE + 6) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_NO_DATA (JBR_COMP_BASE + 7) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_SYS_CALL_FAILURE (JBR_COMP_BASE + 8) - -/*! - * @messageid - * @diagnosis - * @recommendedaction - */ -#define J_MSG_QUORUM_NOT_MET (JBR_COMP_BASE + 9) - -#endif /* _JBR_MESSAGES_H_ */ diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c deleted file mode 100644 index 9bb9346c5c0..00000000000 --- a/xlators/experimental/jbr-client/src/jbrc.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "call-stub.h" -#include "defaults.h" -#include "timer.h" -#include "xlator.h" -#include "jbr-messages.h" -#include "jbrc.h" -#include "statedump.h" - -#define SCAR_LIMIT 20 -#define HILITE(x) ("[1;33m"x"[0m") - -/* - * The fops are actually generated by gen-fops.py; the rest was mostly copied - * from defaults.c (commit cd253754 on 27 August 2013). - */ - -enum gf_dht_mem_types_ { - gf_mt_jbrc_private_t = gf_common_mt_end + 1, - gf_mt_jbrc_end -}; - -char *JBRC_XATTR = "user.jbr.active"; - -static inline -xlator_t * -ACTIVE_CHILD (xlator_t *parent) -{ - jbrc_private_t *priv = parent->private; - - return priv ? priv->active : FIRST_CHILD(parent); -} - -xlator_t * -next_xlator (xlator_t *this, xlator_t *prev) -{ - xlator_list_t *trav; - - for (trav = this->children; trav; trav = trav->next) { - if (trav->xlator == prev) { - return trav->next ? trav->next->xlator - : this->children->xlator; - } - } - - return NULL; -} - -void -jbrc_retry_cb (void *cb_arg) -{ - jbrc_local_t *local = cb_arg; - - gf_msg (__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retrying %p"), local); - call_resume_wind(local->stub); -} - -#pragma generate - -int32_t -jbrc_forget (xlator_t *this, inode_t *inode) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement forget_cbk"); - return 0; -} - - -int32_t -jbrc_releasedir (xlator_t *this, fd_t *fd) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement releasedir_cbk"); - return 0; -} - -int32_t -jbrc_release (xlator_t *this, fd_t *fd) -{ - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement release_cbk"); - return 0; -} - -struct xlator_fops fops = { - .lookup = jbrc_lookup, - .stat = jbrc_stat, - .fstat = jbrc_fstat, - .truncate = jbrc_truncate, - .ftruncate = jbrc_ftruncate, - .access = jbrc_access, - .readlink = jbrc_readlink, - .mknod = jbrc_mknod, - .mkdir = jbrc_mkdir, - .unlink = jbrc_unlink, - .rmdir = jbrc_rmdir, - .symlink = jbrc_symlink, - .rename = jbrc_rename, - .link = jbrc_link, - .create = jbrc_create, - .open = jbrc_open, - .readv = jbrc_readv, - .writev = jbrc_writev, - .flush = jbrc_flush, - .fsync = jbrc_fsync, - .opendir = jbrc_opendir, - .readdir = jbrc_readdir, - .readdirp = jbrc_readdirp, - .fsyncdir = jbrc_fsyncdir, - .statfs = jbrc_statfs, - .setxattr = jbrc_setxattr, - .getxattr = jbrc_getxattr, - .fsetxattr = jbrc_fsetxattr, - .fgetxattr = jbrc_fgetxattr, - .removexattr = jbrc_removexattr, - .fremovexattr = jbrc_fremovexattr, - .lk = jbrc_lk, - .inodelk = jbrc_inodelk, - .finodelk = jbrc_finodelk, - .entrylk = jbrc_entrylk, - .fentrylk = jbrc_fentrylk, - .rchecksum = jbrc_rchecksum, - .xattrop = jbrc_xattrop, - .fxattrop = jbrc_fxattrop, - .setattr = jbrc_setattr, - .fsetattr = jbrc_fsetattr, - .fallocate = jbrc_fallocate, - .discard = jbrc_discard, -}; - -struct xlator_cbks cbks = { -}; - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("jbrc", this, out); - - ret = xlator_mem_acct_init (this, gf_mt_jbrc_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "Memory accounting init failed"); - return ret; - } -out: - return ret; -} - - -int32_t -jbrc_init (xlator_t *this) -{ - jbrc_private_t *priv = NULL; - xlator_list_t *trav = NULL; - - this->local_pool = mem_pool_new (jbrc_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "failed to create jbrc_local_t pool"); - goto err; - } - - priv = GF_CALLOC (1, sizeof (*priv), gf_mt_jbrc_private_t); - if (!priv) { - goto err; - } - - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } - - priv->active = FIRST_CHILD(this); - this->private = priv; - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -jbrc_fini (xlator_t *this) -{ - GF_FREE(this->private); -} - -int -jbrc_get_child_index (xlator_t *this, xlator_t *kid) -{ - xlator_list_t *trav; - int retval = -1; - - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } - } - - return -1; -} - -uint8_t -jbrc_count_up_kids (jbrc_private_t *priv) -{ - uint8_t retval = 0; - uint8_t i; - - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } - } - - return retval; -} - -int32_t -jbrc_notify (xlator_t *this, int32_t event, void *data, ...) -{ - int32_t ret = 0; - int32_t index = 0; - jbrc_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - switch (event) { - case GF_EVENT_CHILD_UP: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state |= (1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - ret = default_notify (this, event, data); - break; - case GF_EVENT_CHILD_DOWN: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state &= ~(1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - break; - default: - ret = default_notify (this, event, data); - } - -out: - return ret; -} - -int -jbrc_priv_dump (xlator_t *this) -{ - jbrc_private_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - xlator_list_t *trav = NULL; - int32_t i = -1; - - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", - this->type, this->name); - gf_proc_dump_add_section(key_prefix); - - gf_proc_dump_write("up_children", "%u", priv->up_children); - - for (trav = this->children, i = 0; trav; trav = trav->next, i++) { - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i); - gf_proc_dump_write(key_prefix, "%s", trav->xlator->name); - } - -out: - return 0; -} - -struct xlator_dumpops dumpops = { - .priv = jbrc_priv_dump, -}; - -class_methods_t class_methods = { - .init = jbrc_init, - .fini = jbrc_fini, - .notify = jbrc_notify, -}; - -struct volume_options options[] = { - { .key = {NULL} }, -}; diff --git a/xlators/experimental/jbr-client/src/jbrc.h b/xlators/experimental/jbr-client/src/jbrc.h deleted file mode 100644 index c83259ca1bd..00000000000 --- a/xlators/experimental/jbr-client/src/jbrc.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _JBRC_H_ -#define _JBRC_H_ - -typedef struct { - xlator_t *active; - uint8_t up_children; - uint8_t n_children; - uint32_t kid_state; -} jbrc_private_t; - -typedef struct { - call_stub_t *stub; - xlator_t *curr_xl; - uint16_t scars; -} jbrc_local_t; - -#endif /* _JBRC_H_ */ diff --git a/xlators/experimental/jbr-server/Makefile.am b/xlators/experimental/jbr-server/Makefile.am deleted file mode 100644 index a985f42a877..00000000000 --- a/xlators/experimental/jbr-server/Makefile.am +++ /dev/null @@ -1,3 +0,0 @@ -SUBDIRS = src - -CLEANFILES = diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am deleted file mode 100644 index 66f73ba8c96..00000000000 --- a/xlators/experimental/jbr-server/src/Makefile.am +++ /dev/null @@ -1,35 +0,0 @@ -xlator_LTLIBRARIES = jbr.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental - -nodist_jbr_la_SOURCES = jbr-cg.c -CLEANFILES = $(nodist_jbr_la_SOURCES) - -jbr_la_LDFLAGS = -module -avoid-version -jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ - $(top_builddir)/api/src/libgfapi.la - -noinst_HEADERS = jbr-internal.h \ - $(top_srcdir)/xlators/lib/src/libxlator.h \ - $(top_srcdir)/glusterfsd/src/glusterfsd.h - -AM_CPPFLAGS = $(GF_CPPFLAGS) \ - -I$(top_srcdir)/libglusterfs/src \ - -I$(top_srcdir)/xlators/lib/src \ - -I$(top_srcdir)/rpc/rpc-lib/src -DSBIN_DIR=\"$(sbindir)\" \ - -I$(top_srcdir)/api/src -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \ - -I$(top_srcdir)/xlators/experimental/jbr-client/src/ - -AM_CFLAGS = -Wall $(GF_CFLAGS) - -JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src -JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py -JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c -JBR_WRAPPER = $(JBR_PREFIX)/jbr.c -noinst_PYTHON = $(JBR_GEN_FOPS) -EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER) - -jbr-cg.c: $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) - $(PYTHON) $(JBR_GEN_FOPS) $(JBR_TEMPLATES) $(JBR_WRAPPER) > $@ - -uninstall-local: - rm -f $(DESTDIR)$(xlatordir)/jbr.so diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c deleted file mode 100644 index 9b9a3e0be5e..00000000000 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ /dev/null @@ -1,437 +0,0 @@ -/* - * You can put anything here - it doesn't even have to be a comment - and it - * will be ignored until we reach the first template-name comment. - */ - - -/* template-name read-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_private_t *priv = this->private; - gf_boolean_t in_recon = _gf_false; - int32_t recon_term, recon_index; - - /* allow reads during reconciliation * - * TBD: allow "dirty" reads on non-leaders * - */ - if (xdata && - (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && - (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { - in_recon = _gf_true; - } - - if ((!priv->leader) && (in_recon == _gf_false)) { - goto err; - } - - STACK_WIND (frame, default_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, EREMOTE, - @ERROR_ARGS@); - return 0; -} - -/* template-name read-dispatch */ -/* No "dispatch" function needed for @NAME@ */ - -/* template-name read-fan-in */ -/* No "fan-in" function needed for @NAME@ */ - -/* template-name read-continue */ -/* No "continue" function needed for @NAME@ */ - -/* template-name read-complete */ -/* No "complete" function needed for @NAME@ */ - -/* template-name write-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = this->private; - gf_boolean_t result = _gf_false; - int op_errno = ENOMEM; - int from_leader; - int from_recon; - uint32_t ti = 0; - - /* - * Our first goal here is to avoid "split brain surprise" for users who - * specify exactly 50% with two- or three-way replication. That means - * either a more-than check against half the total replicas or an - * at-least check against half of our peers (one less). Of the two, - * only an at-least check supports the intuitive use of 100% to mean - * all replicas must be present, because "more than 100%" will never - * succeed regardless of which count we use. This leaves us with a - * slightly non-traditional definition of quorum ("at least X% of peers - * not including ourselves") but one that's useful enough to be worth - * it. - * - * Note that n_children and up_children *do* include the local - * subvolume, so we need to subtract one in each case. - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - - if (result == _gf_false) { - /* Emulate the AFR client-side-quorum behavior. */ - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Sufficient number of " - "subvolumes are not up to meet quorum."); - op_errno = EROFS; - goto err; - } - } else { - if (xdata) { - from_leader = !!dict_get(xdata, JBR_TERM_XATTR); - from_recon = !!dict_get(xdata, RECON_TERM_XATTR) - && !!dict_get(xdata, RECON_INDEX_XATTR); - } else { - from_leader = from_recon = _gf_false; - } - - /* follower/recon path * - * just send it to local node * - */ - if (!from_leader && !from_recon) { - op_errno = EREMOTE; - goto err; - } - } - - local = mem_get0(this->local_pool); - if (!local) { - goto err; - } -#if defined(JBR_CG_NEED_FD) - local->fd = fd_ref(fd); -#else - local->fd = NULL; -#endif - INIT_LIST_HEAD(&local->qlinks); - frame->local = local; - - /* - * If we let it through despite not being the leader, then we just want - * to pass it on down without all of the additional xattrs, queuing, and - * so on. However, jbr_*_complete does depend on the initialization - * immediately above this. - */ - if (!priv->leader) { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - } - - if (!xdata) { - xdata = dict_new(); - if (!xdata) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "failed to allocate xdata"); - goto err; - } - } - - if (dict_set_int32(xdata, JBR_TERM_XATTR, priv->current_term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set jbr-term"); - goto err; - } - - LOCK(&priv->index_lock); - ti = ++(priv->index); - UNLOCK(&priv->index_lock); - if (dict_set_int32(xdata, JBR_INDEX_XATTR, ti) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set index"); - goto err; - } - - local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - - -#if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - - if (!ictx) { - op_errno = EIO; - goto err; - } - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - /* - * TBD: enqueue only for real conflict - * - * Currently we just act like all writes are in - * conflict with one another. What we should really do - * is check the active/pending queues and defer only if - * there's a conflict there. - * - * It's important to check the pending queue because we - * might have an active request X which conflicts with - * a pending request Y, and this request Z might - * conflict with Y but not X. If we checked only the - * active queue then Z could jump ahead of Y, which - * would be incorrect. - */ - local->qstub = fop_@NAME@_stub (frame, - jbr_@NAME@_dispatch, - @SHORT_ARGS@); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto err; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - return 0; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } - UNLOCK(&ictx->lock); -#endif - - return jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); - -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; -} - -/* template-name write-dispatch */ -int32_t -jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = frame->local; - jbr_private_t *priv = this->private; - xlator_list_t *trav; - - /* - * TBD: unblock pending request(s) if we fail after this point but - * before we get to jbr_@NAME@_complete (where that code currently - * resides). - */ - - local->call_count = priv->n_children - 1; - local->successful_acks = 0; - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_@NAME@_fan_in, - trav->xlator, trav->xlator->fops->@NAME@, - @SHORT_ARGS@); - } - - /* TBD: variable Issue count */ - return 0; -} - -/* template-name write-fan-in */ -int32_t -jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbr_local_t *local = frame->local; - uint8_t call_count; - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - /* TBD: variable Completion count */ - if (call_count == 0) { - call_resume(local->stub); - } - - return 0; -} - -/* template-name write-continue */ -int32_t -jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* Perform quorum check to see if the leader needs * - * to perform the operation. If the operation will not * - * meet quorum irrespective of the leader's result * - * there is no point in the leader performing the fop * - */ - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " - "to meet quorum. Failing the operation without trying " - "it on the leader."); - STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, - @ERROR_ARGS@); - } else { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - } - - ret = 0; -out: - return ret; -} - -/* template-name write-complete */ -int32_t -jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - gf_boolean_t result = _gf_false; - jbr_private_t *priv = this->private; - - jbr_local_t *local = frame->local; - - /* If the fop failed on the leader, then reduce one succesful ack - * before calculating the fop quorum - */ - LOCK(&frame->lock); - if (op_ret == -1) - (local->successful_acks)--; - UNLOCK(&frame->lock); - -#if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx; - jbr_local_t *next; - - if (local->qlinks.next != &local->qlinks) { - list_del(&local->qlinks); - ictx = jbr_get_inode_ctx(this, local->fd->inode); - if (ictx) { - LOCK(&ictx->lock); - if (ictx->pending) { - /* - * TBD: dequeue *all* non-conflicting - * reqs - * - * With the stub implementation there - * can only be one request active at a - * time (zero here) so it's not an - * issue. In a real implementation - * there might still be other active - * requests to check against, and - * multiple pending requests that could - * continue. - */ - gf_msg_debug (this->name, 0, - "unblocking next request"); - --(ictx->pending); - next = list_entry (ictx->pqueue.next, - jbr_local_t, qlinks); - list_del(&next->qlinks); - list_add_tail(&next->qlinks, - &ictx->aqueue); - call_resume(next->qstub); - } else { - --(ictx->active); - } - UNLOCK(&ictx->lock); - } - } -#endif - -#if defined(JBR_CG_FSYNC) - jbr_mark_fd_dirty(this, local); -#endif - -#if defined(JBR_CG_NEED_FD) - fd_unref(local->fd); -#endif - - /* After the leader completes the fop, a quorum check is * - * performed, taking into account the outcome of the fop * - * on the leader. Irrespective of the fop being successful * - * or failing on the leader, the result of the quorum will * - * determine if the overall fop is successful or not. For * - * example, a fop might have succeeded on every node except * - * the leader, in which case as quorum is being met, the fop * - * will be treated as a successful fop, even though it failed * - * on the leader. On follower nodes, no quorum check should * - * be done, and the result is returned to the leader as is. * - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Quorum is not met. " - "The operation has failed."); - } else { -#if defined(JBR_CG_NEED_FD) - op_ret = local->successful_op_ret; -#else - op_ret = 0; -#endif - op_errno = 0; - gf_msg_debug (this->name, 0, - "Quorum has met. The operation has succeeded."); - } - } - - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - - - return 0; - -} diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py deleted file mode 100755 index 64cbe4f760e..00000000000 --- a/xlators/experimental/jbr-server/src/gen-fops.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/python - -# This script generates the boilerplate versions of most fops and cbks in the -# server. This allows the details of leadership-status checking, sequencing -# between leader and followers (including fan-out), and basic error checking -# to be centralized one place, with per-operation code kept to a minimum. - -import os -import re -import string -import sys - -curdir = os.path.dirname(sys.argv[0]) -gendir = os.path.join(curdir,'../../../../libglusterfs/src') -sys.path.append(gendir) -from generator import ops, fop_subs, cbk_subs, generate - -# We really want the callback argument list, even when we're generating fop -# code, so we propagate here. -# TBD: this should probably be right in generate.py -for k, v in cbk_subs.iteritems(): - fop_subs[k]['@ERROR_ARGS@'] = v['@ERROR_ARGS@'] - -# Stolen from old codegen.py -def load_templates (path): - templates = {} - tmpl_re = re.compile("/\* template-name (.*) \*/") - templates = {} - t_name = None - for line in open(path,"r").readlines(): - if not line: - break - m = tmpl_re.match(line) - if m: - if t_name: - templates[t_name] = string.join(t_contents,'') - t_name = m.group(1).strip() - t_contents = [] - elif t_name: - t_contents.append(line) - if t_name: - templates[t_name] = string.join(t_contents,'') - return templates - -# We need two types of templates. The first, for pure read operations, just -# needs to do a simple am-i-leader check (augmented to allow dirty reads). -# The second, for pure writes, needs to do fan-out to followers between those -# initial checks and local execution. There are other operations that don't -# fit neatly into either category - e.g. lock ops or fsync - so we'll just have -# to handle those manually. The table thus includes entries only for those we -# can categorize. The special cases, plus any new operations we've never even -# heard of, aren't in there. -# -# Various keywords can be used to define/undefine preprocessor symbols used -# in the templates, on a per-function basis. For example, if the keyword here -# is "fsync" (lowercase word or abbreviation) that will cause JBR_CG_FSYNC -# (prefix plus uppercase version) to be defined above all of the generated code -# for that fop. - -fop_table = { - "access": "read", - "create": "write", - "discard": "write", -# "entrylk": "read", - "fallocate": "write", -# "fentrylk": "read", - "fgetxattr": "read", -# "finodelk": "read", -# "flush": "read", - "fremovexattr": "write", - "fsetattr": "write", - "fsetxattr": "write", - "fstat": "read", -# "fsync": "read", -# "fsyncdir": "read", - "ftruncate": "write", - "fxattrop": "write", - "getxattr": "read", -# "inodelk": "read", - "link": "write", -# "lk": "read", -# "lookup": "read", - "mkdir": "write", - "mknod": "write", - "open": "write", - "opendir": "read", - "rchecksum": "read", - "readdir": "read", - "readdirp": "read", - "readlink": "read", - "readv": "read", - "removexattr": "write", - "rename": "write", - "rmdir": "write", - "setattr": "write", - "setxattr": "write", - "stat": "read", - "statfs": "read", - "symlink": "write", - "truncate": "write", - "unlink": "write", - "writev": "write,fsync,queue", - "xattrop": "write", -} - -# Stolen from gen_fdl.py -def gen_server (templates): - fops_done = [] - for name in fop_table.keys(): - info = fop_table[name].split(",") - kind = info[0] - flags = info[1:] - if ("fsync" in flags) or ("queue" in flags): - flags.append("need_fd") - for fname in flags: - print "#define JBR_CG_%s" % fname.upper() - print generate(templates[kind+"-complete"],name,cbk_subs) - print generate(templates[kind+"-continue"],name,fop_subs) - print generate(templates[kind+"-fan-in"],name,cbk_subs) - print generate(templates[kind+"-dispatch"],name,fop_subs) - print generate(templates[kind+"-fop"],name,fop_subs) - for fname in flags: - print "#undef JBR_CG_%s" % fname.upper() - fops_done.append(name) - # Just for fun, emit the fops table too. - print("struct xlator_fops fops = {") - for x in fops_done: - print(" .%s = jbr_%s,"%(x,x)) - print("};") - -tmpl = load_templates(sys.argv[1]) -for l in open(sys.argv[2],'r').readlines(): - if l.find('#pragma generate') != -1: - print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" - gen_server(tmpl) - print "/* END GENERATED CODE */" - else: - print l[:-1] diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h deleted file mode 100644 index ab1dfc16de2..00000000000 --- a/xlators/experimental/jbr-server/src/jbr-internal.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#include <sys/stat.h> -#include <sys/types.h> - -#define LEADER_XATTR "user.jbr.leader" -#define SECOND_CHILD(xl) (xl->children->next->xlator) -#define RECONCILER_PATH JBR_SCRIPT_PREFIX"/reconciler.py" -#define CHANGELOG_ENTRY_SIZE 128 - -enum { - gf_mt_jbr_private_t = gf_common_mt_end + 1, - gf_mt_jbr_fd_ctx_t, - gf_mt_jbr_inode_ctx_t, - gf_mt_jbr_dirty_t, - gf_mt_jbr_end -}; - -typedef enum jbr_recon_notify_ev_id_t { - JBR_RECON_SET_LEADER = 1, - JBR_RECON_ADD_CHILD = 2 -} jbr_recon_notify_ev_id_t; - -typedef struct _jbr_recon_notify_ev_s { - jbr_recon_notify_ev_id_t id; - uint32_t index; /* in case of add */ - struct list_head list; -} jbr_recon_notify_ev_t; - -typedef struct { - /* - * This is a hack to allow a non-leader to accept requests while the - * leader is down, and it only works for n=2. The way it works is that - * "config_leader" indicates the state from our options (via init or - * reconfigure) but "leader" is what the fop code actually looks at. If - * config_leader is true, then leader will *always* be true as well, - * giving that brick precedence. If config_leader is false, then - * leader will only be true if there is no connection to the other - * brick (tracked in jbr_notify). - * - * TBD: implement real leader election - */ - gf_boolean_t config_leader; - gf_boolean_t leader; - uint8_t up_children; - uint8_t n_children; - char *vol_file; - uint32_t current_term; - uint32_t kid_state; - gf_lock_t dirty_lock; - struct list_head dirty_fds; - uint32_t index; - gf_lock_t index_lock; - double quorum_pct; - int term_fd; - long term_total; - long term_read; - /* - * This is a super-duper hack, but it will do for now. The reason it's - * a hack is that we pass this to dict_set_static_bin, so we don't have - * to mess around with allocating and freeing it on every single IPC - * request, but it's totally not thread-safe. On the other hand, there - * should only be one reconciliation thread running and calling these - * functions at a time, so maybe that doesn't matter. - * - * TBD: re-evaluate how to manage this - */ - char term_buf[CHANGELOG_ENTRY_SIZE]; - gf_boolean_t child_up; /* To maintain the state of * - * the translator */ -} jbr_private_t; - -typedef struct { - call_stub_t *stub; - call_stub_t *qstub; - uint32_t call_count; - uint32_t successful_acks; - uint32_t successful_op_ret; - fd_t *fd; - struct list_head qlinks; -} jbr_local_t; - -/* - * This should match whatever changelog returns on the pre-op for us to pass - * when we're ready for our post-op. - */ -typedef uint32_t log_id_t; - -typedef struct { - struct list_head links; - log_id_t id; -} jbr_dirty_list_t; - -typedef struct { - fd_t *fd; - struct list_head dirty_list; - struct list_head fd_list; -} jbr_fd_ctx_t; - -typedef struct { - gf_lock_t lock; - uint32_t active; - struct list_head aqueue; - uint32_t pending; - struct list_head pqueue; -} jbr_inode_ctx_t; - -void jbr_start_reconciler (xlator_t *this); diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c deleted file mode 100644 index 984392c2f87..00000000000 --- a/xlators/experimental/jbr-server/src/jbr.c +++ /dev/null @@ -1,1147 +0,0 @@ -/* - Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fnmatch.h> -#include "call-stub.h" -#include "defaults.h" -#include "xlator.h" -#include "glfs.h" -#include "glfs-internal.h" -#include "run.h" -#include "common-utils.h" -#include "syncop.h" -#include "syscall.h" -#include "compat-errno.h" - -#include "jbr-internal.h" -#include "jbr-messages.h" - -#define JBR_FLUSH_INTERVAL 5 - -enum { - /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */ - JBR_SERVER_IPC_BASE = 0x0e2d66a5, - JBR_SERVER_TERM_RANGE, - JBR_SERVER_OPEN_TERM, - JBR_SERVER_NEXT_ENTRY -}; - -/* Used to check the quorum of acks received after the fop - * confirming the status of the fop on all the brick processes - * for this particular subvolume - */ -gf_boolean_t -fop_quorum_check (xlator_t *this, double n_children, - double current_state) -{ - jbr_private_t *priv = NULL; - gf_boolean_t result = _gf_false; - double required = 0; - double current = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - required = n_children * priv->quorum_pct; - - /* - * Before performing the fop on the leader, we need to check, - * if there is any merit in performing the fop on the leader. - * In a case, where even a successful write on the leader, will - * not meet quorum, there is no point in trying the fop on the - * leader. - * When this function is called after the leader has tried - * performing the fop, this check will calculate quorum taking into - * account the status of the fop on the leader. If the leader's - * op_ret was -1, the complete function would account that by - * decrementing successful_acks by 1 - */ - - current = current_state * 100.0; - - if (current < required) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_QUORUM_NOT_MET, - "Quorum not met. quorum_pct = %f " - "Current State = %f, Required State = %f", - priv->quorum_pct, current, - required); - } else - result = _gf_true; - -out: - return result; -} - -jbr_inode_ctx_t * -jbr_get_inode_ctx (xlator_t *this, inode_t *inode) -{ - uint64_t ctx_int = 0LL; - jbr_inode_ctx_t *ctx_ptr; - - if (__inode_ctx_get(inode, this, &ctx_int) == 0) { - ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), - gf_mt_jbr_inode_ctx_t); - if (ctx_ptr) { - ctx_int = (uint64_t)(long)ctx_ptr; - if (__inode_ctx_set(inode, this, &ctx_int) == 0) { - LOCK_INIT(&ctx_ptr->lock); - INIT_LIST_HEAD(&ctx_ptr->aqueue); - INIT_LIST_HEAD(&ctx_ptr->pqueue); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; -} - -jbr_fd_ctx_t * -jbr_get_fd_ctx (xlator_t *this, fd_t *fd) -{ - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t); - if (ctx_ptr) { - if (__fd_ctx_set(fd, this, (uint64_t)ctx_ptr) == 0) { - INIT_LIST_HEAD(&ctx_ptr->dirty_list); - INIT_LIST_HEAD(&ctx_ptr->fd_list); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; -} - -void -jbr_mark_fd_dirty (xlator_t *this, jbr_local_t *local) -{ - fd_t *fd = local->fd; - jbr_fd_ctx_t *ctx_ptr; - jbr_dirty_list_t *dirty; - jbr_private_t *priv = this->private; - - /* - * TBD: don't do any of this for O_SYNC/O_DIRECT writes. - * Unfortunately, that optimization requires that we distinguish - * between writev and other "write" calls, saving the original flags - * and checking them in the callback. Too much work for too little - * gain right now. - */ - - LOCK(&fd->lock); - ctx_ptr = jbr_get_fd_ctx(this, fd); - dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t); - if (ctx_ptr && dirty) { - gf_msg_trace (this->name, 0, - "marking fd %p as dirty (%p)", fd, dirty); - /* TBD: fill dirty->id from what changelog gave us */ - list_add_tail(&dirty->links, &ctx_ptr->dirty_list); - if (list_empty(&ctx_ptr->fd_list)) { - /* Add a ref so _release doesn't get called. */ - ctx_ptr->fd = fd_ref(fd); - LOCK(&priv->dirty_lock); - list_add_tail (&ctx_ptr->fd_list, - &priv->dirty_fds); - UNLOCK(&priv->dirty_lock); - } - } else { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "could not mark %p dirty", fd); - if (ctx_ptr) { - GF_FREE(ctx_ptr); - } - if (dirty) { - GF_FREE(dirty); - } - } - UNLOCK(&fd->lock); -} - -#define JBR_TERM_XATTR "trusted.jbr.term" -#define JBR_INDEX_XATTR "trusted.jbr.index" -#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count" -#define RECON_TERM_XATTR "trusted.jbr.recon-term" -#define RECON_INDEX_XATTR "trusted.jbr.recon-index" - -#pragma generate - -uint8_t -jbr_count_up_kids (jbr_private_t *priv) -{ - uint8_t retval = 0; - uint8_t i; - - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } - } - - return retval; -} - -/* - * The fsync machinery looks a lot like that for any write call, but there are - * some important differences that are easy to miss. First, we don't care - * about the xdata that shows whether the call came from a leader or - * reconciliation process. If we're the leader we fan out; if we're not we - * don't. Second, we don't wait for followers before we issue the local call. - * The code generation system could be updated to handle this, and still might - * if we need to implement other "almost identical" paths (e.g. for open), but - * a copy is more readable as long as it's just one. - */ - -int32_t -jbr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - jbr_local_t *local = frame->local; - gf_boolean_t unwind; - - LOCK(&frame->lock); - unwind = !--(local->call_count); - UNLOCK(&frame->lock); - - if (unwind) { - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - } - return 0; -} - -int32_t -jbr_fsync_local_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; - jbr_local_t *local = frame->local; - - list_for_each_entry_safe (dirty, dtmp, &local->qlinks, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", local->fd, dirty); - GF_FREE(dirty); - } - - return jbr_fsync_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata); -} - -int32_t -jbr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) -{ - jbr_private_t *priv = this->private; - jbr_local_t *local; - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - xlator_list_t *trav; - - local = mem_get0(this->local_pool); - if (!local) { - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, - NULL, NULL, xdata); - return 0; - } - INIT_LIST_HEAD(&local->qlinks); - frame->local = local; - - /* Move the dirty list from the fd to the fsync request. */ - LOCK(&fd->lock); - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - list_splice_init (&ctx_ptr->dirty_list, - &local->qlinks); - } - UNLOCK(&fd->lock); - - /* Issue the local call. */ - local->call_count = priv->leader ? priv->n_children : 1; - STACK_WIND (frame, jbr_fsync_local_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - - /* Issue remote calls if we're the leader. */ - if (priv->leader) { - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_fsync_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - } - } - - return 0; -} - -int32_t -jbr_getxattr_special (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) -{ - dict_t *result; - jbr_private_t *priv = this->private; - - if (!priv->leader) { - STACK_UNWIND_STRICT (getxattr, frame, -1, EREMOTE, NULL, NULL); - return 0; - } - - if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) { - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); - return 0; - } - - result = dict_new(); - if (!result) { - goto dn_failed; - } - - priv->up_children = jbr_count_up_kids(this->private); - if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, - priv->up_children) != 0) { - goto dsu_failed; - } - - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, result, NULL); - dict_destroy(result); - return 0; - -dsu_failed: - dict_destroy(result); -dn_failed: - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; -} - -void -jbr_flush_fd (xlator_t *this, jbr_fd_ctx_t *fd_ctx) -{ - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; - - list_for_each_entry_safe (dirty, dtmp, &fd_ctx->dirty_list, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", fd_ctx->fd, dirty); - GF_FREE(dirty); - } - - INIT_LIST_HEAD(&fd_ctx->dirty_list); -} - -void * -jbr_flush_thread (void *ctx) -{ - xlator_t *this = ctx; - jbr_private_t *priv = this->private; - struct list_head dirty_fds; - jbr_fd_ctx_t *fd_ctx; - jbr_fd_ctx_t *fd_tmp; - int ret; - - for (;;) { - /* - * We have to be very careful to avoid lock inversions here, so - * we can't just hold priv->dirty_lock while we take and - * release locks for each fd. Instead, we only hold dirty_lock - * at the beginning of each iteration, as we (effectively) make - * a copy of the current list head and then clear the original. - * This leads to four scenarios for adding the first entry to - * an fd and potentially putting it on the global list. - * - * (1) While we're asleep. No lock contention, it just gets - * added and will be processed on the next iteration. - * - * (2) After we've made a local copy, but before we've started - * processing that fd. The new entry will be added to the - * fd (under its lock), and we'll process it on the current - * iteration. - * - * (3) While we're processing the fd. They'll block on the fd - * lock, then see that the list is empty and put it on the - * global list. We'll process it here on the next - * iteration. - * - * (4) While we're working, but after we've processed that fd. - * Same as (1) as far as that fd is concerned. - */ - INIT_LIST_HEAD(&dirty_fds); - LOCK(&priv->dirty_lock); - list_splice_init(&priv->dirty_fds, &dirty_fds); - UNLOCK(&priv->dirty_lock); - - list_for_each_entry_safe (fd_ctx, fd_tmp, &dirty_fds, fd_list) { - ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, - NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to fsync %p (%d)", - fd_ctx->fd, -ret); - } - - LOCK(&fd_ctx->fd->lock); - jbr_flush_fd(this, fd_ctx); - list_del_init(&fd_ctx->fd_list); - UNLOCK(&fd_ctx->fd->lock); - fd_unref(fd_ctx->fd); - } - - sleep(JBR_FLUSH_INTERVAL); - } - - return NULL; -} - - -int32_t -jbr_get_changelog_dir (xlator_t *this, char **cl_dir_p) -{ - xlator_t *cl_xl; - - /* Find our changelog translator. */ - cl_xl = this; - while (cl_xl) { - if (strcmp(cl_xl->type, "features/changelog") == 0) { - break; - } - cl_xl = cl_xl->children->xlator; - } - if (!cl_xl) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog translator"); - return ENOENT; - } - - /* Find the actual changelog directory. */ - if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog-dir for %s", cl_xl->name); - return ENODATA; - } - - return 0; -} - - -void -jbr_get_terms (call_frame_t *frame, xlator_t *this) -{ - int32_t op_errno; - char *cl_dir; - DIR *fp = NULL; - struct dirent *rd_entry; - struct dirent *rd_result; - int32_t term_first = -1; - int32_t term_contig = -1; - int32_t term_last = -1; - int term_num; - char *probe_str; - dict_t *my_xdata = NULL; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; /* Error was already logged. */ - } - op_errno = ENODATA; /* Most common error after this. */ - - rd_entry = alloca (offsetof(struct dirent, d_name) + - pathconf(cl_dir, _PC_NAME_MAX) + 1); - if (!rd_entry) { - goto err; - } - - fp = sys_opendir (cl_dir); - if (!fp) { - op_errno = errno; - goto err; - } - - /* Find first and last terms. */ - for (;;) { - if (readdir_r(fp, rd_entry, &rd_result) != 0) { - op_errno = errno; - goto err; - } - if (!rd_result) { - break; - } - if (fnmatch("TERM.*", rd_entry->d_name, FNM_PATHNAME) != 0) { - continue; - } - /* +5 points to the character after the period */ - term_num = atoi(rd_entry->d_name+5); - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "%s => %d", rd_entry->d_name, term_num); - if (term_num < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INVALID, - "invalid term file name %s", rd_entry->d_name); - op_errno = EINVAL; - goto err; - } - if ((term_first < 0) || (term_first > term_num)) { - term_first = term_num; - } - if ((term_last < 0) || (term_last < term_num)) { - term_last = term_num; - } - } - if ((term_first < 0) || (term_last < 0)) { - /* TBD: are we *sure* there should always be at least one? */ - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "no terms found"); - op_errno = EINVAL; - goto err; - } - - sys_closedir (fp); - fp = NULL; - - /* - * Find term_contig, which is the earliest term for which there are - * no gaps between it and term_last. - */ - for (term_contig = term_last; term_contig > 0; --term_contig) { - if (gf_asprintf(&probe_str, "%s/TERM.%d", - cl_dir, term_contig-1) <= 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to format term %d", term_contig-1); - goto err; - } - if (sys_access(probe_str, F_OK) != 0) { - GF_FREE(probe_str); - break; - } - GF_FREE(probe_str); - } - - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "found terms %d-%d (%d)", - term_first, term_last, term_contig); - - /* Return what we've found */ - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to allocate reply dictionary"); - goto err; - } - if (dict_set_int32(my_xdata, "term-first", term_first) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-first"); - goto err; - } - if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-contig"); - goto err; - } - if (dict_set_int32(my_xdata, "term-last", term_last) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-last"); - goto err; - } - - /* Finally! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; - -err: - if (fp) { - sys_closedir (fp); - } - if (my_xdata) { - dict_unref(my_xdata); - } - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); -} - - -long -get_entry_count (xlator_t *this, int fd) -{ - struct stat buf; - long min; /* last entry not known to be empty */ - long max; /* first entry known to be empty */ - long curr; - char entry[CHANGELOG_ENTRY_SIZE]; - - if (sys_fstat (fd, &buf) < 0) { - return -1; - } - - min = 0; - max = buf.st_size / CHANGELOG_ENTRY_SIZE; - - while ((min+1) < max) { - curr = (min + max) / 2; - if (sys_lseek(fd, curr*CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) { - return -1; - } - if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) { - return -1; - } - if ((entry[0] == '_') && (entry[1] == 'P')) { - min = curr; - } else { - max = curr; - } - } - - if (sys_lseek(fd, 0, SEEK_SET) < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to reset offset"); - } - return max; -} - - -void -jbr_open_term (call_frame_t *frame, xlator_t *this, dict_t *xdata) -{ - int32_t op_errno; - char *cl_dir; - char *term; - char *path; - jbr_private_t *priv = this->private; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; - } - - if (dict_get_str(xdata, "term", &term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "missing term"); - op_errno = ENODATA; - goto err; - } - - if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to construct path"); - op_errno = ENOMEM; - goto err; - } - - if (priv->term_fd >= 0) { - sys_close (priv->term_fd); - } - priv->term_fd = open(path, O_RDONLY); - if (priv->term_fd < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to open term file"); - goto err; - } - - priv->term_total = get_entry_count(this, priv->term_fd); - if (priv->term_total < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "failed to get entry count"); - sys_close (priv->term_fd); - priv->term_fd = -1; - op_errno = EIO; - goto err; - } - priv->term_read = 0; - - /* Success! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - return; - -err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); -} - - -void -jbr_next_entry (call_frame_t *frame, xlator_t *this) -{ - int32_t op_errno = ENOMEM; - jbr_private_t *priv = this->private; - ssize_t nbytes; - dict_t *my_xdata; - - if (priv->term_fd < 0) { - op_errno = EBADFD; - goto err; - } - - if (priv->term_read >= priv->term_total) { - op_errno = ENODATA; - goto err; - } - - nbytes = sys_read (priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE); - if (nbytes < CHANGELOG_ENTRY_SIZE) { - if (nbytes < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "error reading next entry: %s", - strerror(errno)); - } else { - op_errno = EIO; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "got %ld/%d bytes for next entry", - nbytes, CHANGELOG_ENTRY_SIZE); - } - goto err; - } - ++(priv->term_read); - - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to allocate reply xdata"); - goto err; - } - - if (dict_set_static_bin(my_xdata, "data", - priv->term_buf, CHANGELOG_ENTRY_SIZE) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to assign reply xdata"); - goto err; - } - - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; - -err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); -} - - -int32_t -jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - switch (op) { - case JBR_SERVER_TERM_RANGE: - jbr_get_terms(frame, this); - break; - case JBR_SERVER_OPEN_TERM: - jbr_open_term(frame, this, xdata); - break; - case JBR_SERVER_NEXT_ENTRY: - jbr_next_entry(frame, this); - break; - default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } - - return 0; -} - - -int32_t -jbr_forget (xlator_t *this, inode_t *inode) -{ - uint64_t ctx = 0LL; - - if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } - - return 0; -} - -int32_t -jbr_release (xlator_t *this, fd_t *fd) -{ - uint64_t ctx = 0LL; - - if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } - - return 0; -} - -struct xlator_cbks cbks = { - .forget = jbr_forget, - .release = jbr_release, -}; - -int -jbr_reconfigure (xlator_t *this, dict_t *options) -{ - jbr_private_t *priv = this->private; - - GF_OPTION_RECONF ("leader", - priv->config_leader, options, bool, err); - GF_OPTION_RECONF ("quorum-percent", - priv->quorum_pct, options, percent, err); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "reconfigure called, config_leader = %d, quorum_pct = %.1f\n", - priv->leader, priv->quorum_pct); - - priv->leader = priv->config_leader; - - return 0; - -err: - return -1; -} - -int -jbr_get_child_index (xlator_t *this, xlator_t *kid) -{ - xlator_list_t *trav; - int retval = -1; - - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } - } - - return -1; -} - -/* - * Child notify handling is unreasonably FUBAR. Sometimes we'll get a - * CHILD_DOWN for a protocol/client child before we ever got a CHILD_UP for it. - * Other times we won't. Because it's effectively random (probably racy), we - * can't just maintain a count. We actually have to keep track of the state - * for each child separately, to filter out the bogus CHILD_DOWN events, and - * then generate counts on demand. - */ -int -jbr_notify (xlator_t *this, int event, void *data, ...) -{ - jbr_private_t *priv = this->private; - int index = -1; - int ret = -1; - gf_boolean_t result = _gf_false; - gf_boolean_t relevant = _gf_false; - - switch (event) { - case GF_EVENT_CHILD_UP: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously down - * and it's not a false CHILD_UP - */ - if (!(priv->kid_state & (1 << index))) { - relevant = _gf_true; - } - - priv->kid_state |= (1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children > 1)) { - priv->leader = _gf_false; - } - - /* If it's not relevant, or we have already * - * sent CHILD_UP just break */ - if (!relevant || priv->child_up) - break; - - /* If it's not a leader, just send the notify up */ - if (!priv->leader) { - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are up to meet quorum. Waiting to " - "send CHILD_UP from leader"); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are up " - "to meet quorum. Sending CHILD_UP " - "from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - } - } - break; - case GF_EVENT_CHILD_DOWN: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously up - * and it's not a false CHILD_DOWN - */ - if (priv->kid_state & (1 << index)) { - relevant = _gf_true; - } - priv->kid_state &= ~(1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children < 2) - && relevant) { - priv->leader = _gf_true; - } - - /* If it's not relevant, or we have already * - * sent CHILD_DOWN just break */ - if (!relevant || !priv->child_up) - break; - - /* If it's not a leader, just break coz we shouldn't * - * propagate the failure from the failure till it * - * itself goes down * - */ - if (!priv->leader) { - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are " - "to down to fail quorum. " - "Sending CHILD_DOWN from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_false; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are down to fail quorum. Waiting to " - "send CHILD_DOWN from leader"); - } - } - break; - default: - ret = default_notify(this, event, data); - } - - return ret; -} - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - - ret = xlator_mem_acct_init (this, gf_mt_jbr_end + 1); - - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "Memory accounting init" "failed"); - return ret; - } -out: - return ret; -} - - -void -jbr_deallocate_priv (jbr_private_t *priv) -{ - if (!priv) { - return; - } - - GF_FREE(priv); -} - - -int32_t -jbr_init (xlator_t *this) -{ - xlator_list_t *remote; - xlator_list_t *local; - jbr_private_t *priv = NULL; - xlator_list_t *trav; - pthread_t kid; - extern xlator_t global_xlator; - glusterfs_ctx_t *oldctx = global_xlator.ctx; - - /* - * Any fop that gets special treatment has to be patched in here, - * because the compiled-in table is produced by the code generator and - * only contains generated functions. Note that we have to go through - * this->fops because of some dynamic-linking strangeness; modifying - * the static table doesn't work. - */ - this->fops->getxattr = jbr_getxattr_special; - this->fops->fsync = jbr_fsync; - this->fops->ipc = jbr_ipc; - - local = this->children; - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no local subvolume"); - goto err; - } - - remote = local->next; - if (!remote) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no remote subvolumes"); - goto err; - } - - this->local_pool = mem_pool_new (jbr_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "failed to create jbr_local_t pool"); - goto err; - } - - priv = GF_CALLOC (1, sizeof(*priv), gf_mt_jbr_private_t); - if (!priv) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "could not allocate priv"); - goto err; - } - - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } - - LOCK_INIT(&priv->dirty_lock); - LOCK_INIT(&priv->index_lock); - INIT_LIST_HEAD(&priv->dirty_fds); - priv->term_fd = -1; - - this->private = priv; - - GF_OPTION_INIT ("leader", priv->config_leader, bool, err); - GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); - - priv->leader = priv->config_leader; - priv->child_up = _gf_false; - - if (pthread_create(&kid, NULL, jbr_flush_thread, - this) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, - "could not start flush thread"); - /* TBD: treat this as a fatal error? */ - } - - /* - * Calling glfs_new changes old->ctx, even if THIS still points - * to global_xlator. That causes problems later in the main - * thread, when gf_log_dump_graph tries to use the FILE after - * we've mucked with it and gets a segfault in __fprintf_chk. - * We can avoid all that by undoing the damage before we - * continue. - */ - global_xlator.ctx = oldctx; - - return 0; - -err: - jbr_deallocate_priv(priv); - return -1; -} - - -void -jbr_fini (xlator_t *this) -{ - jbr_deallocate_priv(this->private); -} - -class_methods_t class_methods = { - .init = jbr_init, - .fini = jbr_fini, - .reconfigure = jbr_reconfigure, - .notify = jbr_notify, -}; - -struct volume_options options[] = { - { .key = {"leader"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "false", - .description = "Start in the leader role. This is only for " - "bootstrapping the code, and should go away when we " - "have real leader election." - }, - { .key = {"vol-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "volume name" - }, - { .key = {"my-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "brick name in form of host:/path" - }, - { .key = {"etcd-servers"}, - .type = GF_OPTION_TYPE_STR, - .description = "list of comma seperated etc servers" - }, - { .key = {"subvol-uuid"}, - .type = GF_OPTION_TYPE_STR, - .description = "UUID for this JBR (sub)volume" - }, - { .key = {"quorum-percent"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "50.0", - .description = "percentage of rep_count-1 that must be up" - }, - { .key = {NULL} }, -}; diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 541ff40ee0c..505f13afd82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -115,19 +115,6 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg) return NULL; } -static xlator_t * -xlator_instantiate (const char *type, const char *format, ...) -{ - va_list arg; - xlator_t *xl; - - va_start (arg, format); - xl = xlator_instantiate_va (type, format, arg); - va_end (arg); - - return xl; -} - static int volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) { @@ -1759,30 +1746,6 @@ out: return ret; } -/* Add this before (above) io-threads because it's not thread-safe yet. */ -static int -brick_graph_add_fdl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, glusterd_brickinfo_t *brickinfo) -{ - - xlator_t *xl = NULL; - int ret = -1; - - if (!graph || !volinfo || !set_dict) - goto out; - - if (dict_get_str_boolean (set_dict, "features.fdl", 0)) { - xl = volgen_graph_add (graph, "experimental/fdl", - volinfo->volname); - if (!xl) - goto out; - } - ret = 0; - -out: - return ret; -} - static int brick_graph_add_iot (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -1851,79 +1814,6 @@ add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer, return kid; } -int -add_jbr_stuff (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - xlator_t *me; - glusterd_brickinfo_t *peer; - glusterd_brickinfo_t *prev_peer; - char *leader_opt; - uint16_t index = 0; - xlator_t *kid; - - /* Create the JBR xlator, but defer linkage for now. */ - me = xlator_instantiate ("experimental/jbr", "%s-jbr", - volinfo->volname); - if (!me || volgen_xlator_link(me, first_of(graph))) { - return -1; - } - - /* Figure out if we should start as leader, mark appropriately. */ - peer = list_prev (brickinfo, &volinfo->bricks, - glusterd_brickinfo_t, brick_list); - leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes" - : "no"; - if (xlator_set_option(me, "leader", leader_opt)) { - /* - * TBD: fix memory leak ("me" and associated dictionary) - * There seems to be no function already to clean up a - * just-allocated translator object if something else fails. - * Apparently the convention elsewhere in this file is to return - * without freeing anything, but we can't keep being that sloppy - * forever. - */ - return -1; - } - - /* - * Make sure we're at the beginning of the list of bricks in this - * replica set. This way all bricks' volfiles have peers in a - * consistent order. - */ - peer = brickinfo; - for (;;) { - prev_peer = list_prev (peer, &volinfo->bricks, - glusterd_brickinfo_t, brick_list); - if (!prev_peer || (prev_peer->group != brickinfo->group)) { - break; - } - peer = prev_peer; - } - - /* Actually add the peers. */ - do { - if (peer != brickinfo) { - gf_log ("glusterd", GF_LOG_INFO, - "%s:%s needs client for %s:%s", - brickinfo->hostname, brickinfo->path, - peer->hostname, peer->path); - kid = add_one_peer (graph, peer, - volinfo->volname, index++); - if (!kid || volgen_xlator_link(me, kid)) { - return -1; - } - } - peer = list_next (peer, &volinfo->bricks, - glusterd_brickinfo_t, brick_list); - } while (peer && (peer->group == brickinfo->group)); - - /* Finish linkage to client file. */ - glusterfs_graph_set_first(&graph->graph, me); - - return 0; -} - static int brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -1936,11 +1826,6 @@ brick_graph_add_index (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (!graph || !volinfo || !brickinfo || !set_dict) goto out; - /* For JBR we don't need/want index. */ - if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) { - return add_jbr_stuff (graph, volinfo, brickinfo); - } - xl = volgen_graph_add (graph, "features/index", volinfo->volname); if (!xl) goto out; @@ -2384,7 +2269,6 @@ static volgen_brick_xlator_t server_graph_table[] = { {brick_graph_add_index, "index"}, {brick_graph_add_barrier, NULL}, {brick_graph_add_marker, "marker"}, - {brick_graph_add_fdl, "fdl"}, {brick_graph_add_iot, "io-threads"}, {brick_graph_add_upcall, "upcall"}, {brick_graph_add_pump, NULL}, @@ -3450,18 +3334,12 @@ volgen_graph_build_afr_clusters (volgen_graph_t *graph, int i = 0; int ret = 0; int clusters = 0; - char *replicate_type = NULL; + char *replicate_type = "cluster/replicate"; char *replicate_name = "%s-replicate-%d"; xlator_t *afr = NULL; char option[32] = {0}; int start_count = 0; - if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) { - replicate_type = "experimental/jbrc"; - } else { - replicate_type = "cluster/replicate"; - } - if (volinfo->tier_info.cold_type == GF_CLUSTER_TYPE_REPLICATE) start_count = volinfo->tier_info.cold_brick_count / volinfo->tier_info.cold_replica_count; @@ -5189,22 +5067,6 @@ get_parent_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo) PATH_MAX - strlen(filename) - 1); } -void -assign_jbr_uuids (glusterd_volinfo_t *volinfo) -{ - glusterd_brickinfo_t *brickinfo = NULL; - int in_group = 0; - uuid_t tmp_uuid; - - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (in_group == 0) - gf_uuid_generate(tmp_uuid); - gf_uuid_copy(brickinfo->jbr_uuid, tmp_uuid); - if (++in_group >= volinfo->replica_count) - in_group = 0; - } -} - int generate_brick_volfiles (glusterd_volinfo_t *volinfo) { @@ -5273,10 +5135,6 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo) } } - if (glusterd_volinfo_get_boolean(volinfo, "cluster.jbr") > 0) { - assign_jbr_uuids(volinfo); - } - ret = glusterd_volume_brick_for_each (volinfo, NULL, glusterd_generate_brick_volfile); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 0600290cbe0..134ea8639f0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2929,28 +2929,7 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_7_6, .flags = OPT_FLAG_CLIENT_OPT }, - { .key = "cluster.jbr", - .voltype = "experimental/jbr", - .option = "!jbr", - .op_version = GD_OP_VERSION_4_0_0, - .description = "enable JBR instead of AFR for replication", - .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT - }, - { .key = "cluster.jbr.quorum-percent", - .voltype = "experimental/jbr", - .option = "quorum-percent", - .op_version = GD_OP_VERSION_4_0_0, - .description = "percent of rep_count-1 bricks that must be up" - }, /* Full Data Logging */ - { - .key = "features.fdl", - .voltype = "features/fdl", - .option = "!fdl", - .op_version = GD_OP_VERSION_4_0_0, - .flags = OPT_FLAG_XLATOR_OPT, - .type = NO_DOC, - }, { .key = "cluster.shd-max-threads", .voltype = "cluster/replicate", .op_version = GD_OP_VERSION_3_7_12, diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 3c7f27bb03a..9889565e2b7 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -216,7 +216,6 @@ struct glusterd_brickinfo { * a replica 3 volume with arbiter enabled. */ uint16_t group; - uuid_t jbr_uuid; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; |